/* Autogenerated: src/ExtractionOCaml/word_by_word_montgomery --static p256 '2^256 - 2^224 + 2^192 + 2^96 - 1' 32 mul square add sub opp from_montgomery nonzero selectznz to_bytes from_bytes */
/* curve description: p256 */
/* requested operations: mul, square, add, sub, opp, from_montgomery, nonzero, selectznz, to_bytes, from_bytes */
/* m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1") */
/* machine_wordsize = 32 (from "32") */
/*                                                                    */
/* NOTE: In addition to the bounds specified above each function, all */
/*   functions synthesized for this Montgomery arithmetic require the */
/*   input to be strictly less than the prime modulus (m), and also   */
/*   require the input to be in the unique saturated representation.  */
/*   All functions also ensure that these two properties are true of  */
/*   return values.                                                   */

#include <stdint.h>
typedef unsigned char fiat_p256_uint1;
typedef signed char fiat_p256_int1;

#if (-1 & 3) != 3
#error "This code only works on a two's complement system"
#endif


/*
 * The function fiat_p256_addcarryx_u32 is an addition with carry.
 * Postconditions:
 *   out1 = (arg1 + arg2 + arg3) mod 2^32
 *   out2 = ⌊(arg1 + arg2 + arg3) / 2^32⌋
 *
 * Input Bounds:
 *   arg1: [0x0 ~> 0x1]
 *   arg2: [0x0 ~> 0xffffffff]
 *   arg3: [0x0 ~> 0xffffffff]
 * Output Bounds:
 *   out1: [0x0 ~> 0xffffffff]
 *   out2: [0x0 ~> 0x1]
 */
static void fiat_p256_addcarryx_u32(uint32_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint32_t arg2, uint32_t arg3) {
  uint64_t x1 = ((arg1 + (uint64_t)arg2) + arg3);
  uint32_t x2 = (uint32_t)(x1 & UINT32_C(0xffffffff));
  fiat_p256_uint1 x3 = (fiat_p256_uint1)(x1 >> 32);
  *out1 = x2;
  *out2 = x3;
}

/*
 * The function fiat_p256_subborrowx_u32 is a subtraction with borrow.
 * Postconditions:
 *   out1 = (-arg1 + arg2 + -arg3) mod 2^32
 *   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^32⌋
 *
 * Input Bounds:
 *   arg1: [0x0 ~> 0x1]
 *   arg2: [0x0 ~> 0xffffffff]
 *   arg3: [0x0 ~> 0xffffffff]
 * Output Bounds:
 *   out1: [0x0 ~> 0xffffffff]
 *   out2: [0x0 ~> 0x1]
 */
static void fiat_p256_subborrowx_u32(uint32_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint32_t arg2, uint32_t arg3) {
  int64_t x1 = ((arg2 - (int64_t)arg1) - arg3);
  fiat_p256_int1 x2 = (fiat_p256_int1)(x1 >> 32);
  uint32_t x3 = (uint32_t)(x1 & UINT32_C(0xffffffff));
  *out1 = x3;
  *out2 = (fiat_p256_uint1)(0x0 - x2);
}

/*
 * The function fiat_p256_mulx_u32 is a multiplication, returning the full double-width result.
 * Postconditions:
 *   out1 = (arg1 * arg2) mod 2^32
 *   out2 = ⌊arg1 * arg2 / 2^32⌋
 *
 * Input Bounds:
 *   arg1: [0x0 ~> 0xffffffff]
 *   arg2: [0x0 ~> 0xffffffff]
 * Output Bounds:
 *   out1: [0x0 ~> 0xffffffff]
 *   out2: [0x0 ~> 0xffffffff]
 */
static void fiat_p256_mulx_u32(uint32_t* out1, uint32_t* out2, uint32_t arg1, uint32_t arg2) {
  uint64_t x1 = ((uint64_t)arg1 * arg2);
  uint32_t x2 = (uint32_t)(x1 & UINT32_C(0xffffffff));
  uint32_t x3 = (uint32_t)(x1 >> 32);
  *out1 = x2;
  *out2 = x3;
}

/*
 * The function fiat_p256_cmovznz_u32 is a single-word conditional move.
 * Postconditions:
 *   out1 = (if arg1 = 0 then arg2 else arg3)
 *
 * Input Bounds:
 *   arg1: [0x0 ~> 0x1]
 *   arg2: [0x0 ~> 0xffffffff]
 *   arg3: [0x0 ~> 0xffffffff]
 * Output Bounds:
 *   out1: [0x0 ~> 0xffffffff]
 */
static void fiat_p256_cmovznz_u32(uint32_t* out1, fiat_p256_uint1 arg1, uint32_t arg2, uint32_t arg3) {
  fiat_p256_uint1 x1 = (!(!arg1));
  uint32_t x2 = ((fiat_p256_int1)(0x0 - x1) & UINT32_C(0xffffffff));
  // Note this line has been patched from the synthesized code to add value
  // barriers.
  //
  // Clang recognizes this pattern as a select. While it usually transforms it
  // to a cmov, it sometimes further transforms it into a branch, which we do
  // not want.
  uint32_t x3 = ((value_barrier_u32(x2) & arg3) | (value_barrier_u32(~x2) & arg2));
  *out1 = x3;
}

/*
 * The function fiat_p256_mul multiplies two field elements in the Montgomery domain.
 * Preconditions:
 *   0 ≤ eval arg1 < m
 *   0 ≤ eval arg2 < m
 * Postconditions:
 *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
 *   0 ≤ eval out1 < m
 *
 * Input Bounds:
 *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 *   arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 * Output Bounds:
 *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 */
static void fiat_p256_mul(uint32_t out1[8], const uint32_t arg1[8], const uint32_t arg2[8]) {
  uint32_t x1 = (arg1[1]);
  uint32_t x2 = (arg1[2]);
  uint32_t x3 = (arg1[3]);
  uint32_t x4 = (arg1[4]);
  uint32_t x5 = (arg1[5]);
  uint32_t x6 = (arg1[6]);
  uint32_t x7 = (arg1[7]);
  uint32_t x8 = (arg1[0]);
  uint32_t x9;
  uint32_t x10;
  fiat_p256_mulx_u32(&x9, &x10, x8, (arg2[7]));
  uint32_t x11;
  uint32_t x12;
  fiat_p256_mulx_u32(&x11, &x12, x8, (arg2[6]));
  uint32_t x13;
  uint32_t x14;
  fiat_p256_mulx_u32(&x13, &x14, x8, (arg2[5]));
  uint32_t x15;
  uint32_t x16;
  fiat_p256_mulx_u32(&x15, &x16, x8, (arg2[4]));
  uint32_t x17;
  uint32_t x18;
  fiat_p256_mulx_u32(&x17, &x18, x8, (arg2[3]));
  uint32_t x19;
  uint32_t x20;
  fiat_p256_mulx_u32(&x19, &x20, x8, (arg2[2]));
  uint32_t x21;
  uint32_t x22;
  fiat_p256_mulx_u32(&x21, &x22, x8, (arg2[1]));
  uint32_t x23;
  uint32_t x24;
  fiat_p256_mulx_u32(&x23, &x24, x8, (arg2[0]));
  uint32_t x25;
  fiat_p256_uint1 x26;
  fiat_p256_addcarryx_u32(&x25, &x26, 0x0, x24, x21);
  uint32_t x27;
  fiat_p256_uint1 x28;
  fiat_p256_addcarryx_u32(&x27, &x28, x26, x22, x19);
  uint32_t x29;
  fiat_p256_uint1 x30;
  fiat_p256_addcarryx_u32(&x29, &x30, x28, x20, x17);
  uint32_t x31;
  fiat_p256_uint1 x32;
  fiat_p256_addcarryx_u32(&x31, &x32, x30, x18, x15);
  uint32_t x33;
  fiat_p256_uint1 x34;
  fiat_p256_addcarryx_u32(&x33, &x34, x32, x16, x13);
  uint32_t x35;
  fiat_p256_uint1 x36;
  fiat_p256_addcarryx_u32(&x35, &x36, x34, x14, x11);
  uint32_t x37;
  fiat_p256_uint1 x38;
  fiat_p256_addcarryx_u32(&x37, &x38, x36, x12, x9);
  uint32_t x39 = (x38 + x10);
  uint32_t x40;
  uint32_t x41;
  fiat_p256_mulx_u32(&x40, &x41, x23, UINT32_C(0xffffffff));
  uint32_t x42;
  uint32_t x43;
  fiat_p256_mulx_u32(&x42, &x43, x23, UINT32_C(0xffffffff));
  uint32_t x44;
  uint32_t x45;
  fiat_p256_mulx_u32(&x44, &x45, x23, UINT32_C(0xffffffff));
  uint32_t x46;
  uint32_t x47;
  fiat_p256_mulx_u32(&x46, &x47, x23, UINT32_C(0xffffffff));
  uint32_t x48;
  fiat_p256_uint1 x49;
  fiat_p256_addcarryx_u32(&x48, &x49, 0x0, x47, x44);
  uint32_t x50;
  fiat_p256_uint1 x51;
  fiat_p256_addcarryx_u32(&x50, &x51, x49, x45, x42);
  uint32_t x52 = (x51 + x43);
  uint32_t x53;
  fiat_p256_uint1 x54;
  fiat_p256_addcarryx_u32(&x53, &x54, 0x0, x23, x46);
  uint32_t x55;
  fiat_p256_uint1 x56;
  fiat_p256_addcarryx_u32(&x55, &x56, x54, x25, x48);
  uint32_t x57;
  fiat_p256_uint1 x58;
  fiat_p256_addcarryx_u32(&x57, &x58, x56, x27, x50);
  uint32_t x59;
  fiat_p256_uint1 x60;
  fiat_p256_addcarryx_u32(&x59, &x60, x58, x29, x52);
  uint32_t x61;
  fiat_p256_uint1 x62;
  fiat_p256_addcarryx_u32(&x61, &x62, x60, x31, 0x0);
  uint32_t x63;
  fiat_p256_uint1 x64;
  fiat_p256_addcarryx_u32(&x63, &x64, x62, x33, 0x0);
  uint32_t x65;
  fiat_p256_uint1 x66;
  fiat_p256_addcarryx_u32(&x65, &x66, x64, x35, x23);
  uint32_t x67;
  fiat_p256_uint1 x68;
  fiat_p256_addcarryx_u32(&x67, &x68, x66, x37, x40);
  uint32_t x69;
  fiat_p256_uint1 x70;
  fiat_p256_addcarryx_u32(&x69, &x70, x68, x39, x41);
  uint32_t x71;
  uint32_t x72;
  fiat_p256_mulx_u32(&x71, &x72, x1, (arg2[7]));
  uint32_t x73;
  uint32_t x74;
  fiat_p256_mulx_u32(&x73, &x74, x1, (arg2[6]));
  uint32_t x75;
  uint32_t x76;
  fiat_p256_mulx_u32(&x75, &x76, x1, (arg2[5]));
  uint32_t x77;
  uint32_t x78;
  fiat_p256_mulx_u32(&x77, &x78, x1, (arg2[4]));
  uint32_t x79;
  uint32_t x80;
  fiat_p256_mulx_u32(&x79, &x80, x1, (arg2[3]));
  uint32_t x81;
  uint32_t x82;
  fiat_p256_mulx_u32(&x81, &x82, x1, (arg2[2]));
  uint32_t x83;
  uint32_t x84;
  fiat_p256_mulx_u32(&x83, &x84, x1, (arg2[1]));
  uint32_t x85;
  uint32_t x86;
  fiat_p256_mulx_u32(&x85, &x86, x1, (arg2[0]));
  uint32_t x87;
  fiat_p256_uint1 x88;
  fiat_p256_addcarryx_u32(&x87, &x88, 0x0, x86, x83);
  uint32_t x89;
  fiat_p256_uint1 x90;
  fiat_p256_addcarryx_u32(&x89, &x90, x88, x84, x81);
  uint32_t x91;
  fiat_p256_uint1 x92;
  fiat_p256_addcarryx_u32(&x91, &x92, x90, x82, x79);
  uint32_t x93;
  fiat_p256_uint1 x94;
  fiat_p256_addcarryx_u32(&x93, &x94, x92, x80, x77);
  uint32_t x95;
  fiat_p256_uint1 x96;
  fiat_p256_addcarryx_u32(&x95, &x96, x94, x78, x75);
  uint32_t x97;
  fiat_p256_uint1 x98;
  fiat_p256_addcarryx_u32(&x97, &x98, x96, x76, x73);
  uint32_t x99;
  fiat_p256_uint1 x100;
  fiat_p256_addcarryx_u32(&x99, &x100, x98, x74, x71);
  uint32_t x101 = (x100 + x72);
  uint32_t x102;
  fiat_p256_uint1 x103;
  fiat_p256_addcarryx_u32(&x102, &x103, 0x0, x55, x85);
  uint32_t x104;
  fiat_p256_uint1 x105;
  fiat_p256_addcarryx_u32(&x104, &x105, x103, x57, x87);
  uint32_t x106;
  fiat_p256_uint1 x107;
  fiat_p256_addcarryx_u32(&x106, &x107, x105, x59, x89);
  uint32_t x108;
  fiat_p256_uint1 x109;
  fiat_p256_addcarryx_u32(&x108, &x109, x107, x61, x91);
  uint32_t x110;
  fiat_p256_uint1 x111;
  fiat_p256_addcarryx_u32(&x110, &x111, x109, x63, x93);
  uint32_t x112;
  fiat_p256_uint1 x113;
  fiat_p256_addcarryx_u32(&x112, &x113, x111, x65, x95);
  uint32_t x114;
  fiat_p256_uint1 x115;
  fiat_p256_addcarryx_u32(&x114, &x115, x113, x67, x97);
  uint32_t x116;
  fiat_p256_uint1 x117;
  fiat_p256_addcarryx_u32(&x116, &x117, x115, x69, x99);
  uint32_t x118;
  fiat_p256_uint1 x119;
  fiat_p256_addcarryx_u32(&x118, &x119, x117, x70, x101);
  uint32_t x120;
  uint32_t x121;
  fiat_p256_mulx_u32(&x120, &x121, x102, UINT32_C(0xffffffff));
  uint32_t x122;
  uint32_t x123;
  fiat_p256_mulx_u32(&x122, &x123, x102, UINT32_C(0xffffffff));
  uint32_t x124;
  uint32_t x125;
  fiat_p256_mulx_u32(&x124, &x125, x102, UINT32_C(0xffffffff));
  uint32_t x126;
  uint32_t x127;
  fiat_p256_mulx_u32(&x126, &x127, x102, UINT32_C(0xffffffff));
  uint32_t x128;
  fiat_p256_uint1 x129;
  fiat_p256_addcarryx_u32(&x128, &x129, 0x0, x127, x124);
  uint32_t x130;
  fiat_p256_uint1 x131;
  fiat_p256_addcarryx_u32(&x130, &x131, x129, x125, x122);
  uint32_t x132 = (x131 + x123);
  uint32_t x133;
  fiat_p256_uint1 x134;
  fiat_p256_addcarryx_u32(&x133, &x134, 0x0, x102, x126);
  uint32_t x135;
  fiat_p256_uint1 x136;
  fiat_p256_addcarryx_u32(&x135, &x136, x134, x104, x128);
  uint32_t x137;
  fiat_p256_uint1 x138;
  fiat_p256_addcarryx_u32(&x137, &x138, x136, x106, x130);
  uint32_t x139;
  fiat_p256_uint1 x140;
  fiat_p256_addcarryx_u32(&x139, &x140, x138, x108, x132);
  uint32_t x141;
  fiat_p256_uint1 x142;
  fiat_p256_addcarryx_u32(&x141, &x142, x140, x110, 0x0);
  uint32_t x143;
  fiat_p256_uint1 x144;
  fiat_p256_addcarryx_u32(&x143, &x144, x142, x112, 0x0);
  uint32_t x145;
  fiat_p256_uint1 x146;
  fiat_p256_addcarryx_u32(&x145, &x146, x144, x114, x102);
  uint32_t x147;
  fiat_p256_uint1 x148;
  fiat_p256_addcarryx_u32(&x147, &x148, x146, x116, x120);
  uint32_t x149;
  fiat_p256_uint1 x150;
  fiat_p256_addcarryx_u32(&x149, &x150, x148, x118, x121);
  uint32_t x151 = ((uint32_t)x150 + x119);
  uint32_t x152;
  uint32_t x153;
  fiat_p256_mulx_u32(&x152, &x153, x2, (arg2[7]));
  uint32_t x154;
  uint32_t x155;
  fiat_p256_mulx_u32(&x154, &x155, x2, (arg2[6]));
  uint32_t x156;
  uint32_t x157;
  fiat_p256_mulx_u32(&x156, &x157, x2, (arg2[5]));
  uint32_t x158;
  uint32_t x159;
  fiat_p256_mulx_u32(&x158, &x159, x2, (arg2[4]));
  uint32_t x160;
  uint32_t x161;
  fiat_p256_mulx_u32(&x160, &x161, x2, (arg2[3]));
  uint32_t x162;
  uint32_t x163;
  fiat_p256_mulx_u32(&x162, &x163, x2, (arg2[2]));
  uint32_t x164;
  uint32_t x165;
  fiat_p256_mulx_u32(&x164, &x165, x2, (arg2[1]));
  uint32_t x166;
  uint32_t x167;
  fiat_p256_mulx_u32(&x166, &x167, x2, (arg2[0]));
  uint32_t x168;
  fiat_p256_uint1 x169;
  fiat_p256_addcarryx_u32(&x168, &x169, 0x0, x167, x164);
  uint32_t x170;
  fiat_p256_uint1 x171;
  fiat_p256_addcarryx_u32(&x170, &x171, x169, x165, x162);
  uint32_t x172;
  fiat_p256_uint1 x173;
  fiat_p256_addcarryx_u32(&x172, &x173, x171, x163, x160);
  uint32_t x174;
  fiat_p256_uint1 x175;
  fiat_p256_addcarryx_u32(&x174, &x175, x173, x161, x158);
  uint32_t x176;
  fiat_p256_uint1 x177;
  fiat_p256_addcarryx_u32(&x176, &x177, x175, x159, x156);
  uint32_t x178;
  fiat_p256_uint1 x179;
  fiat_p256_addcarryx_u32(&x178, &x179, x177, x157, x154);
  uint32_t x180;
  fiat_p256_uint1 x181;
  fiat_p256_addcarryx_u32(&x180, &x181, x179, x155, x152);
  uint32_t x182 = (x181 + x153);
  uint32_t x183;
  fiat_p256_uint1 x184;
  fiat_p256_addcarryx_u32(&x183, &x184, 0x0, x135, x166);
  uint32_t x185;
  fiat_p256_uint1 x186;
  fiat_p256_addcarryx_u32(&x185, &x186, x184, x137, x168);
  uint32_t x187;
  fiat_p256_uint1 x188;
  fiat_p256_addcarryx_u32(&x187, &x188, x186, x139, x170);
  uint32_t x189;
  fiat_p256_uint1 x190;
  fiat_p256_addcarryx_u32(&x189, &x190, x188, x141, x172);
  uint32_t x191;
  fiat_p256_uint1 x192;
  fiat_p256_addcarryx_u32(&x191, &x192, x190, x143, x174);
  uint32_t x193;
  fiat_p256_uint1 x194;
  fiat_p256_addcarryx_u32(&x193, &x194, x192, x145, x176);
  uint32_t x195;
  fiat_p256_uint1 x196;
  fiat_p256_addcarryx_u32(&x195, &x196, x194, x147, x178);
  uint32_t x197;
  fiat_p256_uint1 x198;
  fiat_p256_addcarryx_u32(&x197, &x198, x196, x149, x180);
  uint32_t x199;
  fiat_p256_uint1 x200;
  fiat_p256_addcarryx_u32(&x199, &x200, x198, x151, x182);
  uint32_t x201;
  uint32_t x202;
  fiat_p256_mulx_u32(&x201, &x202, x183, UINT32_C(0xffffffff));
  uint32_t x203;
  uint32_t x204;
  fiat_p256_mulx_u32(&x203, &x204, x183, UINT32_C(0xffffffff));
  uint32_t x205;
  uint32_t x206;
  fiat_p256_mulx_u32(&x205, &x206, x183, UINT32_C(0xffffffff));
  uint32_t x207;
  uint32_t x208;
  fiat_p256_mulx_u32(&x207, &x208, x183, UINT32_C(0xffffffff));
  uint32_t x209;
  fiat_p256_uint1 x210;
  fiat_p256_addcarryx_u32(&x209, &x210, 0x0, x208, x205);
  uint32_t x211;
  fiat_p256_uint1 x212;
  fiat_p256_addcarryx_u32(&x211, &x212, x210, x206, x203);
  uint32_t x213 = (x212 + x204);
  uint32_t x214;
  fiat_p256_uint1 x215;
  fiat_p256_addcarryx_u32(&x214, &x215, 0x0, x183, x207);
  uint32_t x216;
  fiat_p256_uint1 x217;
  fiat_p256_addcarryx_u32(&x216, &x217, x215, x185, x209);
  uint32_t x218;
  fiat_p256_uint1 x219;
  fiat_p256_addcarryx_u32(&x218, &x219, x217, x187, x211);
  uint32_t x220;
  fiat_p256_uint1 x221;
  fiat_p256_addcarryx_u32(&x220, &x221, x219, x189, x213);
  uint32_t x222;
  fiat_p256_uint1 x223;
  fiat_p256_addcarryx_u32(&x222, &x223, x221, x191, 0x0);
  uint32_t x224;
  fiat_p256_uint1 x225;
  fiat_p256_addcarryx_u32(&x224, &x225, x223, x193, 0x0);
  uint32_t x226;
  fiat_p256_uint1 x227;
  fiat_p256_addcarryx_u32(&x226, &x227, x225, x195, x183);
  uint32_t x228;
  fiat_p256_uint1 x229;
  fiat_p256_addcarryx_u32(&x228, &x229, x227, x197, x201);
  uint32_t x230;
  fiat_p256_uint1 x231;
  fiat_p256_addcarryx_u32(&x230, &x231, x229, x199, x202);
  uint32_t x232 = ((uint32_t)x231 + x200);
  uint32_t x233;
  uint32_t x234;
  fiat_p256_mulx_u32(&x233, &x234, x3, (arg2[7]));
  uint32_t x235;
  uint32_t x236;
  fiat_p256_mulx_u32(&x235, &x236, x3, (arg2[6]));
  uint32_t x237;
  uint32_t x238;
  fiat_p256_mulx_u32(&x237, &x238, x3, (arg2[5]));
  uint32_t x239;
  uint32_t x240;
  fiat_p256_mulx_u32(&x239, &x240, x3, (arg2[4]));
  uint32_t x241;
  uint32_t x242;
  fiat_p256_mulx_u32(&x241, &x242, x3, (arg2[3]));
  uint32_t x243;
  uint32_t x244;
  fiat_p256_mulx_u32(&x243, &x244, x3, (arg2[2]));
  uint32_t x245;
  uint32_t x246;
  fiat_p256_mulx_u32(&x245, &x246, x3, (arg2[1]));
  uint32_t x247;
  uint32_t x248;
  fiat_p256_mulx_u32(&x247, &x248, x3, (arg2[0]));
  uint32_t x249;
  fiat_p256_uint1 x250;
  fiat_p256_addcarryx_u32(&x249, &x250, 0x0, x248, x245);
  uint32_t x251;
  fiat_p256_uint1 x252;
  fiat_p256_addcarryx_u32(&x251, &x252, x250, x246, x243);
  uint32_t x253;
  fiat_p256_uint1 x254;
  fiat_p256_addcarryx_u32(&x253, &x254, x252, x244, x241);
  uint32_t x255;
  fiat_p256_uint1 x256;
  fiat_p256_addcarryx_u32(&x255, &x256, x254, x242, x239);
  uint32_t x257;
  fiat_p256_uint1 x258;
  fiat_p256_addcarryx_u32(&x257, &x258, x256, x240, x237);
  uint32_t x259;
  fiat_p256_uint1 x260;
  fiat_p256_addcarryx_u32(&x259, &x260, x258, x238, x235);
  uint32_t x261;
  fiat_p256_uint1 x262;
  fiat_p256_addcarryx_u32(&x261, &x262, x260, x236, x233);
  uint32_t x263 = (x262 + x234);
  uint32_t x264;
  fiat_p256_uint1 x265;
  fiat_p256_addcarryx_u32(&x264, &x265, 0x0, x216, x247);
  uint32_t x266;
  fiat_p256_uint1 x267;
  fiat_p256_addcarryx_u32(&x266, &x267, x265, x218, x249);
  uint32_t x268;
  fiat_p256_uint1 x269;
  fiat_p256_addcarryx_u32(&x268, &x269, x267, x220, x251);
  uint32_t x270;
  fiat_p256_uint1 x271;
  fiat_p256_addcarryx_u32(&x270, &x271, x269, x222, x253);
  uint32_t x272;
  fiat_p256_uint1 x273;
  fiat_p256_addcarryx_u32(&x272, &x273, x271, x224, x255);
  uint32_t x274;
  fiat_p256_uint1 x275;
  fiat_p256_addcarryx_u32(&x274, &x275, x273, x226, x257);
  uint32_t x276;
  fiat_p256_uint1 x277;
  fiat_p256_addcarryx_u32(&x276, &x277, x275, x228, x259);
  uint32_t x278;
  fiat_p256_uint1 x279;
  fiat_p256_addcarryx_u32(&x278, &x279, x277, x230, x261);
  uint32_t x280;
  fiat_p256_uint1 x281;
  fiat_p256_addcarryx_u32(&x280, &x281, x279, x232, x263);
  uint32_t x282;
  uint32_t x283;
  fiat_p256_mulx_u32(&x282, &x283, x264, UINT32_C(0xffffffff));
  uint32_t x284;
  uint32_t x285;
  fiat_p256_mulx_u32(&x284, &x285, x264, UINT32_C(0xffffffff));
  uint32_t x286;
  uint32_t x287;
  fiat_p256_mulx_u32(&x286, &x287, x264, UINT32_C(0xffffffff));
  uint32_t x288;
  uint32_t x289;
  fiat_p256_mulx_u32(&x288, &x289, x264, UINT32_C(0xffffffff));
  uint32_t x290;
  fiat_p256_uint1 x291;
  fiat_p256_addcarryx_u32(&x290, &x291, 0x0, x289, x286);
  uint32_t x292;
  fiat_p256_uint1 x293;
  fiat_p256_addcarryx_u32(&x292, &x293, x291, x287, x284);
  uint32_t x294 = (x293 + x285);
  uint32_t x295;
  fiat_p256_uint1 x296;
  fiat_p256_addcarryx_u32(&x295, &x296, 0x0, x264, x288);
  uint32_t x297;
  fiat_p256_uint1 x298;
  fiat_p256_addcarryx_u32(&x297, &x298, x296, x266, x290);
  uint32_t x299;
  fiat_p256_uint1 x300;
  fiat_p256_addcarryx_u32(&x299, &x300, x298, x268, x292);
  uint32_t x301;
  fiat_p256_uint1 x302;
  fiat_p256_addcarryx_u32(&x301, &x302, x300, x270, x294);
  uint32_t x303;
  fiat_p256_uint1 x304;
  fiat_p256_addcarryx_u32(&x303, &x304, x302, x272, 0x0);
  uint32_t x305;
  fiat_p256_uint1 x306;
  fiat_p256_addcarryx_u32(&x305, &x306, x304, x274, 0x0);
  uint32_t x307;
  fiat_p256_uint1 x308;
  fiat_p256_addcarryx_u32(&x307, &x308, x306, x276, x264);
  uint32_t x309;
  fiat_p256_uint1 x310;
  fiat_p256_addcarryx_u32(&x309, &x310, x308, x278, x282);
  uint32_t x311;
  fiat_p256_uint1 x312;
  fiat_p256_addcarryx_u32(&x311, &x312, x310, x280, x283);
  uint32_t x313 = ((uint32_t)x312 + x281);
  uint32_t x314;
  uint32_t x315;
  fiat_p256_mulx_u32(&x314, &x315, x4, (arg2[7]));
  uint32_t x316;
  uint32_t x317;
  fiat_p256_mulx_u32(&x316, &x317, x4, (arg2[6]));
  uint32_t x318;
  uint32_t x319;
  fiat_p256_mulx_u32(&x318, &x319, x4, (arg2[5]));
  uint32_t x320;
  uint32_t x321;
  fiat_p256_mulx_u32(&x320, &x321, x4, (arg2[4]));
  uint32_t x322;
  uint32_t x323;
  fiat_p256_mulx_u32(&x322, &x323, x4, (arg2[3]));
  uint32_t x324;
  uint32_t x325;
  fiat_p256_mulx_u32(&x324, &x325, x4, (arg2[2]));
  uint32_t x326;
  uint32_t x327;
  fiat_p256_mulx_u32(&x326, &x327, x4, (arg2[1]));
  uint32_t x328;
  uint32_t x329;
  fiat_p256_mulx_u32(&x328, &x329, x4, (arg2[0]));
  uint32_t x330;
  fiat_p256_uint1 x331;
  fiat_p256_addcarryx_u32(&x330, &x331, 0x0, x329, x326);
  uint32_t x332;
  fiat_p256_uint1 x333;
  fiat_p256_addcarryx_u32(&x332, &x333, x331, x327, x324);
  uint32_t x334;
  fiat_p256_uint1 x335;
  fiat_p256_addcarryx_u32(&x334, &x335, x333, x325, x322);
  uint32_t x336;
  fiat_p256_uint1 x337;
  fiat_p256_addcarryx_u32(&x336, &x337, x335, x323, x320);
  uint32_t x338;
  fiat_p256_uint1 x339;
  fiat_p256_addcarryx_u32(&x338, &x339, x337, x321, x318);
  uint32_t x340;
  fiat_p256_uint1 x341;
  fiat_p256_addcarryx_u32(&x340, &x341, x339, x319, x316);
  uint32_t x342;
  fiat_p256_uint1 x343;
  fiat_p256_addcarryx_u32(&x342, &x343, x341, x317, x314);
  uint32_t x344 = (x343 + x315);
  uint32_t x345;
  fiat_p256_uint1 x346;
  fiat_p256_addcarryx_u32(&x345, &x346, 0x0, x297, x328);
  uint32_t x347;
  fiat_p256_uint1 x348;
  fiat_p256_addcarryx_u32(&x347, &x348, x346, x299, x330);
  uint32_t x349;
  fiat_p256_uint1 x350;
  fiat_p256_addcarryx_u32(&x349, &x350, x348, x301, x332);
  uint32_t x351;
  fiat_p256_uint1 x352;
  fiat_p256_addcarryx_u32(&x351, &x352, x350, x303, x334);
  uint32_t x353;
  fiat_p256_uint1 x354;
  fiat_p256_addcarryx_u32(&x353, &x354, x352, x305, x336);
  uint32_t x355;
  fiat_p256_uint1 x356;
  fiat_p256_addcarryx_u32(&x355, &x356, x354, x307, x338);
  uint32_t x357;
  fiat_p256_uint1 x358;
  fiat_p256_addcarryx_u32(&x357, &x358, x356, x309, x340);
  uint32_t x359;
  fiat_p256_uint1 x360;
  fiat_p256_addcarryx_u32(&x359, &x360, x358, x311, x342);
  uint32_t x361;
  fiat_p256_uint1 x362;
  fiat_p256_addcarryx_u32(&x361, &x362, x360, x313, x344);
  uint32_t x363;
  uint32_t x364;
  fiat_p256_mulx_u32(&x363, &x364, x345, UINT32_C(0xffffffff));
  uint32_t x365;
  uint32_t x366;
  fiat_p256_mulx_u32(&x365, &x366, x345, UINT32_C(0xffffffff));
  uint32_t x367;
  uint32_t x368;
  fiat_p256_mulx_u32(&x367, &x368, x345, UINT32_C(0xffffffff));
  uint32_t x369;
  uint32_t x370;
  fiat_p256_mulx_u32(&x369, &x370, x345, UINT32_C(0xffffffff));
  uint32_t x371;
  fiat_p256_uint1 x372;
  fiat_p256_addcarryx_u32(&x371, &x372, 0x0, x370, x367);
  uint32_t x373;
  fiat_p256_uint1 x374;
  fiat_p256_addcarryx_u32(&x373, &x374, x372, x368, x365);
  uint32_t x375 = (x374 + x366);
  uint32_t x376;
  fiat_p256_uint1 x377;
  fiat_p256_addcarryx_u32(&x376, &x377, 0x0, x345, x369);
  uint32_t x378;
  fiat_p256_uint1 x379;
  fiat_p256_addcarryx_u32(&x378, &x379, x377, x347, x371);
  uint32_t x380;
  fiat_p256_uint1 x381;
  fiat_p256_addcarryx_u32(&x380, &x381, x379, x349, x373);
  uint32_t x382;
  fiat_p256_uint1 x383;
  fiat_p256_addcarryx_u32(&x382, &x383, x381, x351, x375);
  uint32_t x384;
  fiat_p256_uint1 x385;
  fiat_p256_addcarryx_u32(&x384, &x385, x383, x353, 0x0);
  uint32_t x386;
  fiat_p256_uint1 x387;
  fiat_p256_addcarryx_u32(&x386, &x387, x385, x355, 0x0);
  uint32_t x388;
  fiat_p256_uint1 x389;
  fiat_p256_addcarryx_u32(&x388, &x389, x387, x357, x345);
  uint32_t x390;
  fiat_p256_uint1 x391;
  fiat_p256_addcarryx_u32(&x390, &x391, x389, x359, x363);
  uint32_t x392;
  fiat_p256_uint1 x393;
  fiat_p256_addcarryx_u32(&x392, &x393, x391, x361, x364);
  uint32_t x394 = ((uint32_t)x393 + x362);
  uint32_t x395;
  uint32_t x396;
  fiat_p256_mulx_u32(&x395, &x396, x5, (arg2[7]));
  uint32_t x397;
  uint32_t x398;
  fiat_p256_mulx_u32(&x397, &x398, x5, (arg2[6]));
  uint32_t x399;
  uint32_t x400;
  fiat_p256_mulx_u32(&x399, &x400, x5, (arg2[5]));
  uint32_t x401;
  uint32_t x402;
  fiat_p256_mulx_u32(&x401, &x402, x5, (arg2[4]));
  uint32_t x403;
  uint32_t x404;
  fiat_p256_mulx_u32(&x403, &x404, x5, (arg2[3]));
  uint32_t x405;
  uint32_t x406;
  fiat_p256_mulx_u32(&x405, &x406, x5, (arg2[2]));
  uint32_t x407;
  uint32_t x408;
  fiat_p256_mulx_u32(&x407, &x408, x5, (arg2[1]));
  uint32_t x409;
  uint32_t x410;
  fiat_p256_mulx_u32(&x409, &x410, x5, (arg2[0]));
  uint32_t x411;
  fiat_p256_uint1 x412;
  fiat_p256_addcarryx_u32(&x411, &x412, 0x0, x410, x407);
  uint32_t x413;
  fiat_p256_uint1 x414;
  fiat_p256_addcarryx_u32(&x413, &x414, x412, x408, x405);
  uint32_t x415;
  fiat_p256_uint1 x416;
  fiat_p256_addcarryx_u32(&x415, &x416, x414, x406, x403);
  uint32_t x417;
  fiat_p256_uint1 x418;
  fiat_p256_addcarryx_u32(&x417, &x418, x416, x404, x401);
  uint32_t x419;
  fiat_p256_uint1 x420;
  fiat_p256_addcarryx_u32(&x419, &x420, x418, x402, x399);
  uint32_t x421;
  fiat_p256_uint1 x422;
  fiat_p256_addcarryx_u32(&x421, &x422, x420, x400, x397);
  uint32_t x423;
  fiat_p256_uint1 x424;
  fiat_p256_addcarryx_u32(&x423, &x424, x422, x398, x395);
  uint32_t x425 = (x424 + x396);
  uint32_t x426;
  fiat_p256_uint1 x427;
  fiat_p256_addcarryx_u32(&x426, &x427, 0x0, x378, x409);
  uint32_t x428;
  fiat_p256_uint1 x429;
  fiat_p256_addcarryx_u32(&x428, &x429, x427, x380, x411);
  uint32_t x430;
  fiat_p256_uint1 x431;
  fiat_p256_addcarryx_u32(&x430, &x431, x429, x382, x413);
  uint32_t x432;
  fiat_p256_uint1 x433;
  fiat_p256_addcarryx_u32(&x432, &x433, x431, x384, x415);
  uint32_t x434;
  fiat_p256_uint1 x435;
  fiat_p256_addcarryx_u32(&x434, &x435, x433, x386, x417);
  uint32_t x436;
  fiat_p256_uint1 x437;
  fiat_p256_addcarryx_u32(&x436, &x437, x435, x388, x419);
  uint32_t x438;
  fiat_p256_uint1 x439;
  fiat_p256_addcarryx_u32(&x438, &x439, x437, x390, x421);
  uint32_t x440;
  fiat_p256_uint1 x441;
  fiat_p256_addcarryx_u32(&x440, &x441, x439, x392, x423);
  uint32_t x442;
  fiat_p256_uint1 x443;
  fiat_p256_addcarryx_u32(&x442, &x443, x441, x394, x425);
  uint32_t x444;
  uint32_t x445;
  fiat_p256_mulx_u32(&x444, &x445, x426, UINT32_C(0xffffffff));
  uint32_t x446;
  uint32_t x447;
  fiat_p256_mulx_u32(&x446, &x447, x426, UINT32_C(0xffffffff));
  uint32_t x448;
  uint32_t x449;
  fiat_p256_mulx_u32(&x448, &x449, x426, UINT32_C(0xffffffff));
  uint32_t x450;
  uint32_t x451;
  fiat_p256_mulx_u32(&x450, &x451, x426, UINT32_C(0xffffffff));
  uint32_t x452;
  fiat_p256_uint1 x453;
  fiat_p256_addcarryx_u32(&x452, &x453, 0x0, x451, x448);
  uint32_t x454;
  fiat_p256_uint1 x455;
  fiat_p256_addcarryx_u32(&x454, &x455, x453, x449, x446);
  uint32_t x456 = (x455 + x447);
  uint32_t x457;
  fiat_p256_uint1 x458;
  fiat_p256_addcarryx_u32(&x457, &x458, 0x0, x426, x450);
  uint32_t x459;
  fiat_p256_uint1 x460;
  fiat_p256_addcarryx_u32(&x459, &x460, x458, x428, x452);
  uint32_t x461;
  fiat_p256_uint1 x462;
  fiat_p256_addcarryx_u32(&x461, &x462, x460, x430, x454);
  uint32_t x463;
  fiat_p256_uint1 x464;
  fiat_p256_addcarryx_u32(&x463, &x464, x462, x432, x456);
  uint32_t x465;
  fiat_p256_uint1 x466;
  fiat_p256_addcarryx_u32(&x465, &x466, x464, x434, 0x0);
  uint32_t x467;
  fiat_p256_uint1 x468;
  fiat_p256_addcarryx_u32(&x467, &x468, x466, x436, 0x0);
  uint32_t x469;
  fiat_p256_uint1 x470;
  fiat_p256_addcarryx_u32(&x469, &x470, x468, x438, x426);
  uint32_t x471;
  fiat_p256_uint1 x472;
  fiat_p256_addcarryx_u32(&x471, &x472, x470, x440, x444);
  uint32_t x473;
  fiat_p256_uint1 x474;
  fiat_p256_addcarryx_u32(&x473, &x474, x472, x442, x445);
  uint32_t x475 = ((uint32_t)x474 + x443);
  uint32_t x476;
  uint32_t x477;
  fiat_p256_mulx_u32(&x476, &x477, x6, (arg2[7]));
  uint32_t x478;
  uint32_t x479;
  fiat_p256_mulx_u32(&x478, &x479, x6, (arg2[6]));
  uint32_t x480;
  uint32_t x481;
  fiat_p256_mulx_u32(&x480, &x481, x6, (arg2[5]));
  uint32_t x482;
  uint32_t x483;
  fiat_p256_mulx_u32(&x482, &x483, x6, (arg2[4]));
  uint32_t x484;
  uint32_t x485;
  fiat_p256_mulx_u32(&x484, &x485, x6, (arg2[3]));
  uint32_t x486;
  uint32_t x487;
  fiat_p256_mulx_u32(&x486, &x487, x6, (arg2[2]));
  uint32_t x488;
  uint32_t x489;
  fiat_p256_mulx_u32(&x488, &x489, x6, (arg2[1]));
  uint32_t x490;
  uint32_t x491;
  fiat_p256_mulx_u32(&x490, &x491, x6, (arg2[0]));
  uint32_t x492;
  fiat_p256_uint1 x493;
  fiat_p256_addcarryx_u32(&x492, &x493, 0x0, x491, x488);
  uint32_t x494;
  fiat_p256_uint1 x495;
  fiat_p256_addcarryx_u32(&x494, &x495, x493, x489, x486);
  uint32_t x496;
  fiat_p256_uint1 x497;
  fiat_p256_addcarryx_u32(&x496, &x497, x495, x487, x484);
  uint32_t x498;
  fiat_p256_uint1 x499;
  fiat_p256_addcarryx_u32(&x498, &x499, x497, x485, x482);
  uint32_t x500;
  fiat_p256_uint1 x501;
  fiat_p256_addcarryx_u32(&x500, &x501, x499, x483, x480);
  uint32_t x502;
  fiat_p256_uint1 x503;
  fiat_p256_addcarryx_u32(&x502, &x503, x501, x481, x478);
  uint32_t x504;
  fiat_p256_uint1 x505;
  fiat_p256_addcarryx_u32(&x504, &x505, x503, x479, x476);
  uint32_t x506 = (x505 + x477);
  uint32_t x507;
  fiat_p256_uint1 x508;
  fiat_p256_addcarryx_u32(&x507, &x508, 0x0, x459, x490);
  uint32_t x509;
  fiat_p256_uint1 x510;
  fiat_p256_addcarryx_u32(&x509, &x510, x508, x461, x492);
  uint32_t x511;
  fiat_p256_uint1 x512;
  fiat_p256_addcarryx_u32(&x511, &x512, x510, x463, x494);
  uint32_t x513;
  fiat_p256_uint1 x514;
  fiat_p256_addcarryx_u32(&x513, &x514, x512, x465, x496);
  uint32_t x515;
  fiat_p256_uint1 x516;
  fiat_p256_addcarryx_u32(&x515, &x516, x514, x467, x498);
  uint32_t x517;
  fiat_p256_uint1 x518;
  fiat_p256_addcarryx_u32(&x517, &x518, x516, x469, x500);
  uint32_t x519;
  fiat_p256_uint1 x520;
  fiat_p256_addcarryx_u32(&x519, &x520, x518, x471, x502);
  uint32_t x521;
  fiat_p256_uint1 x522;
  fiat_p256_addcarryx_u32(&x521, &x522, x520, x473, x504);
  uint32_t x523;
  fiat_p256_uint1 x524;
  fiat_p256_addcarryx_u32(&x523, &x524, x522, x475, x506);
  uint32_t x525;
  uint32_t x526;
  fiat_p256_mulx_u32(&x525, &x526, x507, UINT32_C(0xffffffff));
  uint32_t x527;
  uint32_t x528;
  fiat_p256_mulx_u32(&x527, &x528, x507, UINT32_C(0xffffffff));
  uint32_t x529;
  uint32_t x530;
  fiat_p256_mulx_u32(&x529, &x530, x507, UINT32_C(0xffffffff));
  uint32_t x531;
  uint32_t x532;
  fiat_p256_mulx_u32(&x531, &x532, x507, UINT32_C(0xffffffff));
  uint32_t x533;
  fiat_p256_uint1 x534;
  fiat_p256_addcarryx_u32(&x533, &x534, 0x0, x532, x529);
  uint32_t x535;
  fiat_p256_uint1 x536;
  fiat_p256_addcarryx_u32(&x535, &x536, x534, x530, x527);
  uint32_t x537 = (x536 + x528);
  uint32_t x538;
  fiat_p256_uint1 x539;
  fiat_p256_addcarryx_u32(&x538, &x539, 0x0, x507, x531);
  uint32_t x540;
  fiat_p256_uint1 x541;
  fiat_p256_addcarryx_u32(&x540, &x541, x539, x509, x533);
  uint32_t x542;
  fiat_p256_uint1 x543;
  fiat_p256_addcarryx_u32(&x542, &x543, x541, x511, x535);
  uint32_t x544;
  fiat_p256_uint1 x545;
  fiat_p256_addcarryx_u32(&x544, &x545, x543, x513, x537);
  uint32_t x546;
  fiat_p256_uint1 x547;
  fiat_p256_addcarryx_u32(&x546, &x547, x545, x515, 0x0);
  uint32_t x548;
  fiat_p256_uint1 x549;
  fiat_p256_addcarryx_u32(&x548, &x549, x547, x517, 0x0);
  uint32_t x550;
  fiat_p256_uint1 x551;
  fiat_p256_addcarryx_u32(&x550, &x551, x549, x519, x507);
  uint32_t x552;
  fiat_p256_uint1 x553;
  fiat_p256_addcarryx_u32(&x552, &x553, x551, x521, x525);
  uint32_t x554;
  fiat_p256_uint1 x555;
  fiat_p256_addcarryx_u32(&x554, &x555, x553, x523, x526);
  uint32_t x556 = ((uint32_t)x555 + x524);
  uint32_t x557;
  uint32_t x558;
  fiat_p256_mulx_u32(&x557, &x558, x7, (arg2[7]));
  uint32_t x559;
  uint32_t x560;
  fiat_p256_mulx_u32(&x559, &x560, x7, (arg2[6]));
  uint32_t x561;
  uint32_t x562;
  fiat_p256_mulx_u32(&x561, &x562, x7, (arg2[5]));
  uint32_t x563;
  uint32_t x564;
  fiat_p256_mulx_u32(&x563, &x564, x7, (arg2[4]));
  uint32_t x565;
  uint32_t x566;
  fiat_p256_mulx_u32(&x565, &x566, x7, (arg2[3]));
  uint32_t x567;
  uint32_t x568;
  fiat_p256_mulx_u32(&x567, &x568, x7, (arg2[2]));
  uint32_t x569;
  uint32_t x570;
  fiat_p256_mulx_u32(&x569, &x570, x7, (arg2[1]));
  uint32_t x571;
  uint32_t x572;
  fiat_p256_mulx_u32(&x571, &x572, x7, (arg2[0]));
  uint32_t x573;
  fiat_p256_uint1 x574;
  fiat_p256_addcarryx_u32(&x573, &x574, 0x0, x572, x569);
  uint32_t x575;
  fiat_p256_uint1 x576;
  fiat_p256_addcarryx_u32(&x575, &x576, x574, x570, x567);
  uint32_t x577;
  fiat_p256_uint1 x578;
  fiat_p256_addcarryx_u32(&x577, &x578, x576, x568, x565);
  uint32_t x579;
  fiat_p256_uint1 x580;
  fiat_p256_addcarryx_u32(&x579, &x580, x578, x566, x563);
  uint32_t x581;
  fiat_p256_uint1 x582;
  fiat_p256_addcarryx_u32(&x581, &x582, x580, x564, x561);
  uint32_t x583;
  fiat_p256_uint1 x584;
  fiat_p256_addcarryx_u32(&x583, &x584, x582, x562, x559);
  uint32_t x585;
  fiat_p256_uint1 x586;
  fiat_p256_addcarryx_u32(&x585, &x586, x584, x560, x557);
  uint32_t x587 = (x586 + x558);
  uint32_t x588;
  fiat_p256_uint1 x589;
  fiat_p256_addcarryx_u32(&x588, &x589, 0x0, x540, x571);
  uint32_t x590;
  fiat_p256_uint1 x591;
  fiat_p256_addcarryx_u32(&x590, &x591, x589, x542, x573);
  uint32_t x592;
  fiat_p256_uint1 x593;
  fiat_p256_addcarryx_u32(&x592, &x593, x591, x544, x575);
  uint32_t x594;
  fiat_p256_uint1 x595;
  fiat_p256_addcarryx_u32(&x594, &x595, x593, x546, x577);
  uint32_t x596;
  fiat_p256_uint1 x597;
  fiat_p256_addcarryx_u32(&x596, &x597, x595, x548, x579);
  uint32_t x598;
  fiat_p256_uint1 x599;
  fiat_p256_addcarryx_u32(&x598, &x599, x597, x550, x581);
  uint32_t x600;
  fiat_p256_uint1 x601;
  fiat_p256_addcarryx_u32(&x600, &x601, x599, x552, x583);
  uint32_t x602;
  fiat_p256_uint1 x603;
  fiat_p256_addcarryx_u32(&x602, &x603, x601, x554, x585);
  uint32_t x604;
  fiat_p256_uint1 x605;
  fiat_p256_addcarryx_u32(&x604, &x605, x603, x556, x587);
  uint32_t x606;
  uint32_t x607;
  fiat_p256_mulx_u32(&x606, &x607, x588, UINT32_C(0xffffffff));
  uint32_t x608;
  uint32_t x609;
  fiat_p256_mulx_u32(&x608, &x609, x588, UINT32_C(0xffffffff));
  uint32_t x610;
  uint32_t x611;
  fiat_p256_mulx_u32(&x610, &x611, x588, UINT32_C(0xffffffff));
  uint32_t x612;
  uint32_t x613;
  fiat_p256_mulx_u32(&x612, &x613, x588, UINT32_C(0xffffffff));
  uint32_t x614;
  fiat_p256_uint1 x615;
  fiat_p256_addcarryx_u32(&x614, &x615, 0x0, x613, x610);
  uint32_t x616;
  fiat_p256_uint1 x617;
  fiat_p256_addcarryx_u32(&x616, &x617, x615, x611, x608);
  uint32_t x618 = (x617 + x609);
  uint32_t x619;
  fiat_p256_uint1 x620;
  fiat_p256_addcarryx_u32(&x619, &x620, 0x0, x588, x612);
  uint32_t x621;
  fiat_p256_uint1 x622;
  fiat_p256_addcarryx_u32(&x621, &x622, x620, x590, x614);
  uint32_t x623;
  fiat_p256_uint1 x624;
  fiat_p256_addcarryx_u32(&x623, &x624, x622, x592, x616);
  uint32_t x625;
  fiat_p256_uint1 x626;
  fiat_p256_addcarryx_u32(&x625, &x626, x624, x594, x618);
  uint32_t x627;
  fiat_p256_uint1 x628;
  fiat_p256_addcarryx_u32(&x627, &x628, x626, x596, 0x0);
  uint32_t x629;
  fiat_p256_uint1 x630;
  fiat_p256_addcarryx_u32(&x629, &x630, x628, x598, 0x0);
  uint32_t x631;
  fiat_p256_uint1 x632;
  fiat_p256_addcarryx_u32(&x631, &x632, x630, x600, x588);
  uint32_t x633;
  fiat_p256_uint1 x634;
  fiat_p256_addcarryx_u32(&x633, &x634, x632, x602, x606);
  uint32_t x635;
  fiat_p256_uint1 x636;
  fiat_p256_addcarryx_u32(&x635, &x636, x634, x604, x607);
  uint32_t x637 = ((uint32_t)x636 + x605);
  uint32_t x638;
  fiat_p256_uint1 x639;
  fiat_p256_subborrowx_u32(&x638, &x639, 0x0, x621, UINT32_C(0xffffffff));
  uint32_t x640;
  fiat_p256_uint1 x641;
  fiat_p256_subborrowx_u32(&x640, &x641, x639, x623, UINT32_C(0xffffffff));
  uint32_t x642;
  fiat_p256_uint1 x643;
  fiat_p256_subborrowx_u32(&x642, &x643, x641, x625, UINT32_C(0xffffffff));
  uint32_t x644;
  fiat_p256_uint1 x645;
  fiat_p256_subborrowx_u32(&x644, &x645, x643, x627, 0x0);
  uint32_t x646;
  fiat_p256_uint1 x647;
  fiat_p256_subborrowx_u32(&x646, &x647, x645, x629, 0x0);
  uint32_t x648;
  fiat_p256_uint1 x649;
  fiat_p256_subborrowx_u32(&x648, &x649, x647, x631, 0x0);
  uint32_t x650;
  fiat_p256_uint1 x651;
  fiat_p256_subborrowx_u32(&x650, &x651, x649, x633, 0x1);
  uint32_t x652;
  fiat_p256_uint1 x653;
  fiat_p256_subborrowx_u32(&x652, &x653, x651, x635, UINT32_C(0xffffffff));
  uint32_t x654;
  fiat_p256_uint1 x655;
  fiat_p256_subborrowx_u32(&x654, &x655, x653, x637, 0x0);
  uint32_t x656;
  fiat_p256_cmovznz_u32(&x656, x655, x638, x621);
  uint32_t x657;
  fiat_p256_cmovznz_u32(&x657, x655, x640, x623);
  uint32_t x658;
  fiat_p256_cmovznz_u32(&x658, x655, x642, x625);
  uint32_t x659;
  fiat_p256_cmovznz_u32(&x659, x655, x644, x627);
  uint32_t x660;
  fiat_p256_cmovznz_u32(&x660, x655, x646, x629);
  uint32_t x661;
  fiat_p256_cmovznz_u32(&x661, x655, x648, x631);
  uint32_t x662;
  fiat_p256_cmovznz_u32(&x662, x655, x650, x633);
  uint32_t x663;
  fiat_p256_cmovznz_u32(&x663, x655, x652, x635);
  out1[0] = x656;
  out1[1] = x657;
  out1[2] = x658;
  out1[3] = x659;
  out1[4] = x660;
  out1[5] = x661;
  out1[6] = x662;
  out1[7] = x663;
}

/*
 * The function fiat_p256_square squares a field element in the Montgomery domain.
 * Preconditions:
 *   0 ≤ eval arg1 < m
 * Postconditions:
 *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
 *   0 ≤ eval out1 < m
 *
 * Input Bounds:
 *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 * Output Bounds:
 *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 */
static void fiat_p256_square(uint32_t out1[8], const uint32_t arg1[8]) {
  uint32_t x1 = (arg1[1]);
  uint32_t x2 = (arg1[2]);
  uint32_t x3 = (arg1[3]);
  uint32_t x4 = (arg1[4]);
  uint32_t x5 = (arg1[5]);
  uint32_t x6 = (arg1[6]);
  uint32_t x7 = (arg1[7]);
  uint32_t x8 = (arg1[0]);
  uint32_t x9;
  uint32_t x10;
  fiat_p256_mulx_u32(&x9, &x10, x8, (arg1[7]));
  uint32_t x11;
  uint32_t x12;
  fiat_p256_mulx_u32(&x11, &x12, x8, (arg1[6]));
  uint32_t x13;
  uint32_t x14;
  fiat_p256_mulx_u32(&x13, &x14, x8, (arg1[5]));
  uint32_t x15;
  uint32_t x16;
  fiat_p256_mulx_u32(&x15, &x16, x8, (arg1[4]));
  uint32_t x17;
  uint32_t x18;
  fiat_p256_mulx_u32(&x17, &x18, x8, (arg1[3]));
  uint32_t x19;
  uint32_t x20;
  fiat_p256_mulx_u32(&x19, &x20, x8, (arg1[2]));
  uint32_t x21;
  uint32_t x22;
  fiat_p256_mulx_u32(&x21, &x22, x8, (arg1[1]));
  uint32_t x23;
  uint32_t x24;
  fiat_p256_mulx_u32(&x23, &x24, x8, (arg1[0]));
  uint32_t x25;
  fiat_p256_uint1 x26;
  fiat_p256_addcarryx_u32(&x25, &x26, 0x0, x24, x21);
  uint32_t x27;
  fiat_p256_uint1 x28;
  fiat_p256_addcarryx_u32(&x27, &x28, x26, x22, x19);
  uint32_t x29;
  fiat_p256_uint1 x30;
  fiat_p256_addcarryx_u32(&x29, &x30, x28, x20, x17);
  uint32_t x31;
  fiat_p256_uint1 x32;
  fiat_p256_addcarryx_u32(&x31, &x32, x30, x18, x15);
  uint32_t x33;
  fiat_p256_uint1 x34;
  fiat_p256_addcarryx_u32(&x33, &x34, x32, x16, x13);
  uint32_t x35;
  fiat_p256_uint1 x36;
  fiat_p256_addcarryx_u32(&x35, &x36, x34, x14, x11);
  uint32_t x37;
  fiat_p256_uint1 x38;
  fiat_p256_addcarryx_u32(&x37, &x38, x36, x12, x9);
  uint32_t x39 = (x38 + x10);
  uint32_t x40;
  uint32_t x41;
  fiat_p256_mulx_u32(&x40, &x41, x23, UINT32_C(0xffffffff));
  uint32_t x42;
  uint32_t x43;
  fiat_p256_mulx_u32(&x42, &x43, x23, UINT32_C(0xffffffff));
  uint32_t x44;
  uint32_t x45;
  fiat_p256_mulx_u32(&x44, &x45, x23, UINT32_C(0xffffffff));
  uint32_t x46;
  uint32_t x47;
  fiat_p256_mulx_u32(&x46, &x47, x23, UINT32_C(0xffffffff));
  uint32_t x48;
  fiat_p256_uint1 x49;
  fiat_p256_addcarryx_u32(&x48, &x49, 0x0, x47, x44);
  uint32_t x50;
  fiat_p256_uint1 x51;
  fiat_p256_addcarryx_u32(&x50, &x51, x49, x45, x42);
  uint32_t x52 = (x51 + x43);
  uint32_t x53;
  fiat_p256_uint1 x54;
  fiat_p256_addcarryx_u32(&x53, &x54, 0x0, x23, x46);
  uint32_t x55;
  fiat_p256_uint1 x56;
  fiat_p256_addcarryx_u32(&x55, &x56, x54, x25, x48);
  uint32_t x57;
  fiat_p256_uint1 x58;
  fiat_p256_addcarryx_u32(&x57, &x58, x56, x27, x50);
  uint32_t x59;
  fiat_p256_uint1 x60;
  fiat_p256_addcarryx_u32(&x59, &x60, x58, x29, x52);
  uint32_t x61;
  fiat_p256_uint1 x62;
  fiat_p256_addcarryx_u32(&x61, &x62, x60, x31, 0x0);
  uint32_t x63;
  fiat_p256_uint1 x64;
  fiat_p256_addcarryx_u32(&x63, &x64, x62, x33, 0x0);
  uint32_t x65;
  fiat_p256_uint1 x66;
  fiat_p256_addcarryx_u32(&x65, &x66, x64, x35, x23);
  uint32_t x67;
  fiat_p256_uint1 x68;
  fiat_p256_addcarryx_u32(&x67, &x68, x66, x37, x40);
  uint32_t x69;
  fiat_p256_uint1 x70;
  fiat_p256_addcarryx_u32(&x69, &x70, x68, x39, x41);
  uint32_t x71;
  uint32_t x72;
  fiat_p256_mulx_u32(&x71, &x72, x1, (arg1[7]));
  uint32_t x73;
  uint32_t x74;
  fiat_p256_mulx_u32(&x73, &x74, x1, (arg1[6]));
  uint32_t x75;
  uint32_t x76;
  fiat_p256_mulx_u32(&x75, &x76, x1, (arg1[5]));
  uint32_t x77;
  uint32_t x78;
  fiat_p256_mulx_u32(&x77, &x78, x1, (arg1[4]));
  uint32_t x79;
  uint32_t x80;
  fiat_p256_mulx_u32(&x79, &x80, x1, (arg1[3]));
  uint32_t x81;
  uint32_t x82;
  fiat_p256_mulx_u32(&x81, &x82, x1, (arg1[2]));
  uint32_t x83;
  uint32_t x84;
  fiat_p256_mulx_u32(&x83, &x84, x1, (arg1[1]));
  uint32_t x85;
  uint32_t x86;
  fiat_p256_mulx_u32(&x85, &x86, x1, (arg1[0]));
  uint32_t x87;
  fiat_p256_uint1 x88;
  fiat_p256_addcarryx_u32(&x87, &x88, 0x0, x86, x83);
  uint32_t x89;
  fiat_p256_uint1 x90;
  fiat_p256_addcarryx_u32(&x89, &x90, x88, x84, x81);
  uint32_t x91;
  fiat_p256_uint1 x92;
  fiat_p256_addcarryx_u32(&x91, &x92, x90, x82, x79);
  uint32_t x93;
  fiat_p256_uint1 x94;
  fiat_p256_addcarryx_u32(&x93, &x94, x92, x80, x77);
  uint32_t x95;
  fiat_p256_uint1 x96;
  fiat_p256_addcarryx_u32(&x95, &x96, x94, x78, x75);
  uint32_t x97;
  fiat_p256_uint1 x98;
  fiat_p256_addcarryx_u32(&x97, &x98, x96, x76, x73);
  uint32_t x99;
  fiat_p256_uint1 x100;
  fiat_p256_addcarryx_u32(&x99, &x100, x98, x74, x71);
  uint32_t x101 = (x100 + x72);
  uint32_t x102;
  fiat_p256_uint1 x103;
  fiat_p256_addcarryx_u32(&x102, &x103, 0x0, x55, x85);
  uint32_t x104;
  fiat_p256_uint1 x105;
  fiat_p256_addcarryx_u32(&x104, &x105, x103, x57, x87);
  uint32_t x106;
  fiat_p256_uint1 x107;
  fiat_p256_addcarryx_u32(&x106, &x107, x105, x59, x89);
  uint32_t x108;
  fiat_p256_uint1 x109;
  fiat_p256_addcarryx_u32(&x108, &x109, x107, x61, x91);
  uint32_t x110;
  fiat_p256_uint1 x111;
  fiat_p256_addcarryx_u32(&x110, &x111, x109, x63, x93);
  uint32_t x112;
  fiat_p256_uint1 x113;
  fiat_p256_addcarryx_u32(&x112, &x113, x111, x65, x95);
  uint32_t x114;
  fiat_p256_uint1 x115;
  fiat_p256_addcarryx_u32(&x114, &x115, x113, x67, x97);
  uint32_t x116;
  fiat_p256_uint1 x117;
  fiat_p256_addcarryx_u32(&x116, &x117, x115, x69, x99);
  uint32_t x118;
  fiat_p256_uint1 x119;
  fiat_p256_addcarryx_u32(&x118, &x119, x117, x70, x101);
  uint32_t x120;
  uint32_t x121;
  fiat_p256_mulx_u32(&x120, &x121, x102, UINT32_C(0xffffffff));
  uint32_t x122;
  uint32_t x123;
  fiat_p256_mulx_u32(&x122, &x123, x102, UINT32_C(0xffffffff));
  uint32_t x124;
  uint32_t x125;
  fiat_p256_mulx_u32(&x124, &x125, x102, UINT32_C(0xffffffff));
  uint32_t x126;
  uint32_t x127;
  fiat_p256_mulx_u32(&x126, &x127, x102, UINT32_C(0xffffffff));
  uint32_t x128;
  fiat_p256_uint1 x129;
  fiat_p256_addcarryx_u32(&x128, &x129, 0x0, x127, x124);
  uint32_t x130;
  fiat_p256_uint1 x131;
  fiat_p256_addcarryx_u32(&x130, &x131, x129, x125, x122);
  uint32_t x132 = (x131 + x123);
  uint32_t x133;
  fiat_p256_uint1 x134;
  fiat_p256_addcarryx_u32(&x133, &x134, 0x0, x102, x126);
  uint32_t x135;
  fiat_p256_uint1 x136;
  fiat_p256_addcarryx_u32(&x135, &x136, x134, x104, x128);
  uint32_t x137;
  fiat_p256_uint1 x138;
  fiat_p256_addcarryx_u32(&x137, &x138, x136, x106, x130);
  uint32_t x139;
  fiat_p256_uint1 x140;
  fiat_p256_addcarryx_u32(&x139, &x140, x138, x108, x132);
  uint32_t x141;
  fiat_p256_uint1 x142;
  fiat_p256_addcarryx_u32(&x141, &x142, x140, x110, 0x0);
  uint32_t x143;
  fiat_p256_uint1 x144;
  fiat_p256_addcarryx_u32(&x143, &x144, x142, x112, 0x0);
  uint32_t x145;
  fiat_p256_uint1 x146;
  fiat_p256_addcarryx_u32(&x145, &x146, x144, x114, x102);
  uint32_t x147;
  fiat_p256_uint1 x148;
  fiat_p256_addcarryx_u32(&x147, &x148, x146, x116, x120);
  uint32_t x149;
  fiat_p256_uint1 x150;
  fiat_p256_addcarryx_u32(&x149, &x150, x148, x118, x121);
  uint32_t x151 = ((uint32_t)x150 + x119);
  uint32_t x152;
  uint32_t x153;
  fiat_p256_mulx_u32(&x152, &x153, x2, (arg1[7]));
  uint32_t x154;
  uint32_t x155;
  fiat_p256_mulx_u32(&x154, &x155, x2, (arg1[6]));
  uint32_t x156;
  uint32_t x157;
  fiat_p256_mulx_u32(&x156, &x157, x2, (arg1[5]));
  uint32_t x158;
  uint32_t x159;
  fiat_p256_mulx_u32(&x158, &x159, x2, (arg1[4]));
  uint32_t x160;
  uint32_t x161;
  fiat_p256_mulx_u32(&x160, &x161, x2, (arg1[3]));
  uint32_t x162;
  uint32_t x163;
  fiat_p256_mulx_u32(&x162, &x163, x2, (arg1[2]));
  uint32_t x164;
  uint32_t x165;
  fiat_p256_mulx_u32(&x164, &x165, x2, (arg1[1]));
  uint32_t x166;
  uint32_t x167;
  fiat_p256_mulx_u32(&x166, &x167, x2, (arg1[0]));
  uint32_t x168;
  fiat_p256_uint1 x169;
  fiat_p256_addcarryx_u32(&x168, &x169, 0x0, x167, x164);
  uint32_t x170;
  fiat_p256_uint1 x171;
  fiat_p256_addcarryx_u32(&x170, &x171, x169, x165, x162);
  uint32_t x172;
  fiat_p256_uint1 x173;
  fiat_p256_addcarryx_u32(&x172, &x173, x171, x163, x160);
  uint32_t x174;
  fiat_p256_uint1 x175;
  fiat_p256_addcarryx_u32(&x174, &x175, x173, x161, x158);
  uint32_t x176;
  fiat_p256_uint1 x177;
  fiat_p256_addcarryx_u32(&x176, &x177, x175, x159, x156);
  uint32_t x178;
  fiat_p256_uint1 x179;
  fiat_p256_addcarryx_u32(&x178, &x179, x177, x157, x154);
  uint32_t x180;
  fiat_p256_uint1 x181;
  fiat_p256_addcarryx_u32(&x180, &x181, x179, x155, x152);
  uint32_t x182 = (x181 + x153);
  uint32_t x183;
  fiat_p256_uint1 x184;
  fiat_p256_addcarryx_u32(&x183, &x184, 0x0, x135, x166);
  uint32_t x185;
  fiat_p256_uint1 x186;
  fiat_p256_addcarryx_u32(&x185, &x186, x184, x137, x168);
  uint32_t x187;
  fiat_p256_uint1 x188;
  fiat_p256_addcarryx_u32(&x187, &x188, x186, x139, x170);
  uint32_t x189;
  fiat_p256_uint1 x190;
  fiat_p256_addcarryx_u32(&x189, &x190, x188, x141, x172);
  uint32_t x191;
  fiat_p256_uint1 x192;
  fiat_p256_addcarryx_u32(&x191, &x192, x190, x143, x174);
  uint32_t x193;
  fiat_p256_uint1 x194;
  fiat_p256_addcarryx_u32(&x193, &x194, x192, x145, x176);
  uint32_t x195;
  fiat_p256_uint1 x196;
  fiat_p256_addcarryx_u32(&x195, &x196, x194, x147, x178);
  uint32_t x197;
  fiat_p256_uint1 x198;
  fiat_p256_addcarryx_u32(&x197, &x198, x196, x149, x180);
  uint32_t x199;
  fiat_p256_uint1 x200;
  fiat_p256_addcarryx_u32(&x199, &x200, x198, x151, x182);
  uint32_t x201;
  uint32_t x202;
  fiat_p256_mulx_u32(&x201, &x202, x183, UINT32_C(0xffffffff));
  uint32_t x203;
  uint32_t x204;
  fiat_p256_mulx_u32(&x203, &x204, x183, UINT32_C(0xffffffff));
  uint32_t x205;
  uint32_t x206;
  fiat_p256_mulx_u32(&x205, &x206, x183, UINT32_C(0xffffffff));
  uint32_t x207;
  uint32_t x208;
  fiat_p256_mulx_u32(&x207, &x208, x183, UINT32_C(0xffffffff));
  uint32_t x209;
  fiat_p256_uint1 x210;
  fiat_p256_addcarryx_u32(&x209, &x210, 0x0, x208, x205);
  uint32_t x211;
  fiat_p256_uint1 x212;
  fiat_p256_addcarryx_u32(&x211, &x212, x210, x206, x203);
  uint32_t x213 = (x212 + x204);
  uint32_t x214;
  fiat_p256_uint1 x215;
  fiat_p256_addcarryx_u32(&x214, &x215, 0x0, x183, x207);
  uint32_t x216;
  fiat_p256_uint1 x217;
  fiat_p256_addcarryx_u32(&x216, &x217, x215, x185, x209);
  uint32_t x218;
  fiat_p256_uint1 x219;
  fiat_p256_addcarryx_u32(&x218, &x219, x217, x187, x211);
  uint32_t x220;
  fiat_p256_uint1 x221;
  fiat_p256_addcarryx_u32(&x220, &x221, x219, x189, x213);
  uint32_t x222;
  fiat_p256_uint1 x223;
  fiat_p256_addcarryx_u32(&x222, &x223, x221, x191, 0x0);
  uint32_t x224;
  fiat_p256_uint1 x225;
  fiat_p256_addcarryx_u32(&x224, &x225, x223, x193, 0x0);
  uint32_t x226;
  fiat_p256_uint1 x227;
  fiat_p256_addcarryx_u32(&x226, &x227, x225, x195, x183);
  uint32_t x228;
  fiat_p256_uint1 x229;
  fiat_p256_addcarryx_u32(&x228, &x229, x227, x197, x201);
  uint32_t x230;
  fiat_p256_uint1 x231;
  fiat_p256_addcarryx_u32(&x230, &x231, x229, x199, x202);
  uint32_t x232 = ((uint32_t)x231 + x200);
  uint32_t x233;
  uint32_t x234;
  fiat_p256_mulx_u32(&x233, &x234, x3, (arg1[7]));
  uint32_t x235;
  uint32_t x236;
  fiat_p256_mulx_u32(&x235, &x236, x3, (arg1[6]));
  uint32_t x237;
  uint32_t x238;
  fiat_p256_mulx_u32(&x237, &x238, x3, (arg1[5]));
  uint32_t x239;
  uint32_t x240;
  fiat_p256_mulx_u32(&x239, &x240, x3, (arg1[4]));
  uint32_t x241;
  uint32_t x242;
  fiat_p256_mulx_u32(&x241, &x242, x3, (arg1[3]));
  uint32_t x243;
  uint32_t x244;
  fiat_p256_mulx_u32(&x243, &x244, x3, (arg1[2]));
  uint32_t x245;
  uint32_t x246;
  fiat_p256_mulx_u32(&x245, &x246, x3, (arg1[1]));
  uint32_t x247;
  uint32_t x248;
  fiat_p256_mulx_u32(&x247, &x248, x3, (arg1[0]));
  uint32_t x249;
  fiat_p256_uint1 x250;
  fiat_p256_addcarryx_u32(&x249, &x250, 0x0, x248, x245);
  uint32_t x251;
  fiat_p256_uint1 x252;
  fiat_p256_addcarryx_u32(&x251, &x252, x250, x246, x243);
  uint32_t x253;
  fiat_p256_uint1 x254;
  fiat_p256_addcarryx_u32(&x253, &x254, x252, x244, x241);
  uint32_t x255;
  fiat_p256_uint1 x256;
  fiat_p256_addcarryx_u32(&x255, &x256, x254, x242, x239);
  uint32_t x257;
  fiat_p256_uint1 x258;
  fiat_p256_addcarryx_u32(&x257, &x258, x256, x240, x237);
  uint32_t x259;
  fiat_p256_uint1 x260;
  fiat_p256_addcarryx_u32(&x259, &x260, x258, x238, x235);
  uint32_t x261;
  fiat_p256_uint1 x262;
  fiat_p256_addcarryx_u32(&x261, &x262, x260, x236, x233);
  uint32_t x263 = (x262 + x234);
  uint32_t x264;
  fiat_p256_uint1 x265;
  fiat_p256_addcarryx_u32(&x264, &x265, 0x0, x216, x247);
  uint32_t x266;
  fiat_p256_uint1 x267;
  fiat_p256_addcarryx_u32(&x266, &x267, x265, x218, x249);
  uint32_t x268;
  fiat_p256_uint1 x269;
  fiat_p256_addcarryx_u32(&x268, &x269, x267, x220, x251);
  uint32_t x270;
  fiat_p256_uint1 x271;
  fiat_p256_addcarryx_u32(&x270, &x271, x269, x222, x253);
  uint32_t x272;
  fiat_p256_uint1 x273;
  fiat_p256_addcarryx_u32(&x272, &x273, x271, x224, x255);
  uint32_t x274;
  fiat_p256_uint1 x275;
  fiat_p256_addcarryx_u32(&x274, &x275, x273, x226, x257);
  uint32_t x276;
  fiat_p256_uint1 x277;
  fiat_p256_addcarryx_u32(&x276, &x277, x275, x228, x259);
  uint32_t x278;
  fiat_p256_uint1 x279;
  fiat_p256_addcarryx_u32(&x278, &x279, x277, x230, x261);
  uint32_t x280;
  fiat_p256_uint1 x281;
  fiat_p256_addcarryx_u32(&x280, &x281, x279, x232, x263);
  uint32_t x282;
  uint32_t x283;
  fiat_p256_mulx_u32(&x282, &x283, x264, UINT32_C(0xffffffff));
  uint32_t x284;
  uint32_t x285;
  fiat_p256_mulx_u32(&x284, &x285, x264, UINT32_C(0xffffffff));
  uint32_t x286;
  uint32_t x287;
  fiat_p256_mulx_u32(&x286, &x287, x264, UINT32_C(0xffffffff));
  uint32_t x288;
  uint32_t x289;
  fiat_p256_mulx_u32(&x288, &x289, x264, UINT32_C(0xffffffff));
  uint32_t x290;
  fiat_p256_uint1 x291;
  fiat_p256_addcarryx_u32(&x290, &x291, 0x0, x289, x286);
  uint32_t x292;
  fiat_p256_uint1 x293;
  fiat_p256_addcarryx_u32(&x292, &x293, x291, x287, x284);
  uint32_t x294 = (x293 + x285);
  uint32_t x295;
  fiat_p256_uint1 x296;
  fiat_p256_addcarryx_u32(&x295, &x296, 0x0, x264, x288);
  uint32_t x297;
  fiat_p256_uint1 x298;
  fiat_p256_addcarryx_u32(&x297, &x298, x296, x266, x290);
  uint32_t x299;
  fiat_p256_uint1 x300;
  fiat_p256_addcarryx_u32(&x299, &x300, x298, x268, x292);
  uint32_t x301;
  fiat_p256_uint1 x302;
  fiat_p256_addcarryx_u32(&x301, &x302, x300, x270, x294);
  uint32_t x303;
  fiat_p256_uint1 x304;
  fiat_p256_addcarryx_u32(&x303, &x304, x302, x272, 0x0);
  uint32_t x305;
  fiat_p256_uint1 x306;
  fiat_p256_addcarryx_u32(&x305, &x306, x304, x274, 0x0);
  uint32_t x307;
  fiat_p256_uint1 x308;
  fiat_p256_addcarryx_u32(&x307, &x308, x306, x276, x264);
  uint32_t x309;
  fiat_p256_uint1 x310;
  fiat_p256_addcarryx_u32(&x309, &x310, x308, x278, x282);
  uint32_t x311;
  fiat_p256_uint1 x312;
  fiat_p256_addcarryx_u32(&x311, &x312, x310, x280, x283);
  uint32_t x313 = ((uint32_t)x312 + x281);
  uint32_t x314;
  uint32_t x315;
  fiat_p256_mulx_u32(&x314, &x315, x4, (arg1[7]));
  uint32_t x316;
  uint32_t x317;
  fiat_p256_mulx_u32(&x316, &x317, x4, (arg1[6]));
  uint32_t x318;
  uint32_t x319;
  fiat_p256_mulx_u32(&x318, &x319, x4, (arg1[5]));
  uint32_t x320;
  uint32_t x321;
  fiat_p256_mulx_u32(&x320, &x321, x4, (arg1[4]));
  uint32_t x322;
  uint32_t x323;
  fiat_p256_mulx_u32(&x322, &x323, x4, (arg1[3]));
  uint32_t x324;
  uint32_t x325;
  fiat_p256_mulx_u32(&x324, &x325, x4, (arg1[2]));
  uint32_t x326;
  uint32_t x327;
  fiat_p256_mulx_u32(&x326, &x327, x4, (arg1[1]));
  uint32_t x328;
  uint32_t x329;
  fiat_p256_mulx_u32(&x328, &x329, x4, (arg1[0]));
  uint32_t x330;
  fiat_p256_uint1 x331;
  fiat_p256_addcarryx_u32(&x330, &x331, 0x0, x329, x326);
  uint32_t x332;
  fiat_p256_uint1 x333;
  fiat_p256_addcarryx_u32(&x332, &x333, x331, x327, x324);
  uint32_t x334;
  fiat_p256_uint1 x335;
  fiat_p256_addcarryx_u32(&x334, &x335, x333, x325, x322);
  uint32_t x336;
  fiat_p256_uint1 x337;
  fiat_p256_addcarryx_u32(&x336, &x337, x335, x323, x320);
  uint32_t x338;
  fiat_p256_uint1 x339;
  fiat_p256_addcarryx_u32(&x338, &x339, x337, x321, x318);
  uint32_t x340;
  fiat_p256_uint1 x341;
  fiat_p256_addcarryx_u32(&x340, &x341, x339, x319, x316);
  uint32_t x342;
  fiat_p256_uint1 x343;
  fiat_p256_addcarryx_u32(&x342, &x343, x341, x317, x314);
  uint32_t x344 = (x343 + x315);
  uint32_t x345;
  fiat_p256_uint1 x346;
  fiat_p256_addcarryx_u32(&x345, &x346, 0x0, x297, x328);
  uint32_t x347;
  fiat_p256_uint1 x348;
  fiat_p256_addcarryx_u32(&x347, &x348, x346, x299, x330);
  uint32_t x349;
  fiat_p256_uint1 x350;
  fiat_p256_addcarryx_u32(&x349, &x350, x348, x301, x332);
  uint32_t x351;
  fiat_p256_uint1 x352;
  fiat_p256_addcarryx_u32(&x351, &x352, x350, x303, x334);
  uint32_t x353;
  fiat_p256_uint1 x354;
  fiat_p256_addcarryx_u32(&x353, &x354, x352, x305, x336);
  uint32_t x355;
  fiat_p256_uint1 x356;
  fiat_p256_addcarryx_u32(&x355, &x356, x354, x307, x338);
  uint32_t x357;
  fiat_p256_uint1 x358;
  fiat_p256_addcarryx_u32(&x357, &x358, x356, x309, x340);
  uint32_t x359;
  fiat_p256_uint1 x360;
  fiat_p256_addcarryx_u32(&x359, &x360, x358, x311, x342);
  uint32_t x361;
  fiat_p256_uint1 x362;
  fiat_p256_addcarryx_u32(&x361, &x362, x360, x313, x344);
  uint32_t x363;
  uint32_t x364;
  fiat_p256_mulx_u32(&x363, &x364, x345, UINT32_C(0xffffffff));
  uint32_t x365;
  uint32_t x366;
  fiat_p256_mulx_u32(&x365, &x366, x345, UINT32_C(0xffffffff));
  uint32_t x367;
  uint32_t x368;
  fiat_p256_mulx_u32(&x367, &x368, x345, UINT32_C(0xffffffff));
  uint32_t x369;
  uint32_t x370;
  fiat_p256_mulx_u32(&x369, &x370, x345, UINT32_C(0xffffffff));
  uint32_t x371;
  fiat_p256_uint1 x372;
  fiat_p256_addcarryx_u32(&x371, &x372, 0x0, x370, x367);
  uint32_t x373;
  fiat_p256_uint1 x374;
  fiat_p256_addcarryx_u32(&x373, &x374, x372, x368, x365);
  uint32_t x375 = (x374 + x366);
  uint32_t x376;
  fiat_p256_uint1 x377;
  fiat_p256_addcarryx_u32(&x376, &x377, 0x0, x345, x369);
  uint32_t x378;
  fiat_p256_uint1 x379;
  fiat_p256_addcarryx_u32(&x378, &x379, x377, x347, x371);
  uint32_t x380;
  fiat_p256_uint1 x381;
  fiat_p256_addcarryx_u32(&x380, &x381, x379, x349, x373);
  uint32_t x382;
  fiat_p256_uint1 x383;
  fiat_p256_addcarryx_u32(&x382, &x383, x381, x351, x375);
  uint32_t x384;
  fiat_p256_uint1 x385;
  fiat_p256_addcarryx_u32(&x384, &x385, x383, x353, 0x0);
  uint32_t x386;
  fiat_p256_uint1 x387;
  fiat_p256_addcarryx_u32(&x386, &x387, x385, x355, 0x0);
  uint32_t x388;
  fiat_p256_uint1 x389;
  fiat_p256_addcarryx_u32(&x388, &x389, x387, x357, x345);
  uint32_t x390;
  fiat_p256_uint1 x391;
  fiat_p256_addcarryx_u32(&x390, &x391, x389, x359, x363);
  uint32_t x392;
  fiat_p256_uint1 x393;
  fiat_p256_addcarryx_u32(&x392, &x393, x391, x361, x364);
  uint32_t x394 = ((uint32_t)x393 + x362);
  uint32_t x395;
  uint32_t x396;
  fiat_p256_mulx_u32(&x395, &x396, x5, (arg1[7]));
  uint32_t x397;
  uint32_t x398;
  fiat_p256_mulx_u32(&x397, &x398, x5, (arg1[6]));
  uint32_t x399;
  uint32_t x400;
  fiat_p256_mulx_u32(&x399, &x400, x5, (arg1[5]));
  uint32_t x401;
  uint32_t x402;
  fiat_p256_mulx_u32(&x401, &x402, x5, (arg1[4]));
  uint32_t x403;
  uint32_t x404;
  fiat_p256_mulx_u32(&x403, &x404, x5, (arg1[3]));
  uint32_t x405;
  uint32_t x406;
  fiat_p256_mulx_u32(&x405, &x406, x5, (arg1[2]));
  uint32_t x407;
  uint32_t x408;
  fiat_p256_mulx_u32(&x407, &x408, x5, (arg1[1]));
  uint32_t x409;
  uint32_t x410;
  fiat_p256_mulx_u32(&x409, &x410, x5, (arg1[0]));
  uint32_t x411;
  fiat_p256_uint1 x412;
  fiat_p256_addcarryx_u32(&x411, &x412, 0x0, x410, x407);
  uint32_t x413;
  fiat_p256_uint1 x414;
  fiat_p256_addcarryx_u32(&x413, &x414, x412, x408, x405);
  uint32_t x415;
  fiat_p256_uint1 x416;
  fiat_p256_addcarryx_u32(&x415, &x416, x414, x406, x403);
  uint32_t x417;
  fiat_p256_uint1 x418;
  fiat_p256_addcarryx_u32(&x417, &x418, x416, x404, x401);
  uint32_t x419;
  fiat_p256_uint1 x420;
  fiat_p256_addcarryx_u32(&x419, &x420, x418, x402, x399);
  uint32_t x421;
  fiat_p256_uint1 x422;
  fiat_p256_addcarryx_u32(&x421, &x422, x420, x400, x397);
  uint32_t x423;
  fiat_p256_uint1 x424;
  fiat_p256_addcarryx_u32(&x423, &x424, x422, x398, x395);
  uint32_t x425 = (x424 + x396);
  uint32_t x426;
  fiat_p256_uint1 x427;
  fiat_p256_addcarryx_u32(&x426, &x427, 0x0, x378, x409);
  uint32_t x428;
  fiat_p256_uint1 x429;
  fiat_p256_addcarryx_u32(&x428, &x429, x427, x380, x411);
  uint32_t x430;
  fiat_p256_uint1 x431;
  fiat_p256_addcarryx_u32(&x430, &x431, x429, x382, x413);
  uint32_t x432;
  fiat_p256_uint1 x433;
  fiat_p256_addcarryx_u32(&x432, &x433, x431, x384, x415);
  uint32_t x434;
  fiat_p256_uint1 x435;
  fiat_p256_addcarryx_u32(&x434, &x435, x433, x386, x417);
  uint32_t x436;
  fiat_p256_uint1 x437;
  fiat_p256_addcarryx_u32(&x436, &x437, x435, x388, x419);
  uint32_t x438;
  fiat_p256_uint1 x439;
  fiat_p256_addcarryx_u32(&x438, &x439, x437, x390, x421);
  uint32_t x440;
  fiat_p256_uint1 x441;
  fiat_p256_addcarryx_u32(&x440, &x441, x439, x392, x423);
  uint32_t x442;
  fiat_p256_uint1 x443;
  fiat_p256_addcarryx_u32(&x442, &x443, x441, x394, x425);
  uint32_t x444;
  uint32_t x445;
  fiat_p256_mulx_u32(&x444, &x445, x426, UINT32_C(0xffffffff));
  uint32_t x446;
  uint32_t x447;
  fiat_p256_mulx_u32(&x446, &x447, x426, UINT32_C(0xffffffff));
  uint32_t x448;
  uint32_t x449;
  fiat_p256_mulx_u32(&x448, &x449, x426, UINT32_C(0xffffffff));
  uint32_t x450;
  uint32_t x451;
  fiat_p256_mulx_u32(&x450, &x451, x426, UINT32_C(0xffffffff));
  uint32_t x452;
  fiat_p256_uint1 x453;
  fiat_p256_addcarryx_u32(&x452, &x453, 0x0, x451, x448);
  uint32_t x454;
  fiat_p256_uint1 x455;
  fiat_p256_addcarryx_u32(&x454, &x455, x453, x449, x446);
  uint32_t x456 = (x455 + x447);
  uint32_t x457;
  fiat_p256_uint1 x458;
  fiat_p256_addcarryx_u32(&x457, &x458, 0x0, x426, x450);
  uint32_t x459;
  fiat_p256_uint1 x460;
  fiat_p256_addcarryx_u32(&x459, &x460, x458, x428, x452);
  uint32_t x461;
  fiat_p256_uint1 x462;
  fiat_p256_addcarryx_u32(&x461, &x462, x460, x430, x454);
  uint32_t x463;
  fiat_p256_uint1 x464;
  fiat_p256_addcarryx_u32(&x463, &x464, x462, x432, x456);
  uint32_t x465;
  fiat_p256_uint1 x466;
  fiat_p256_addcarryx_u32(&x465, &x466, x464, x434, 0x0);
  uint32_t x467;
  fiat_p256_uint1 x468;
  fiat_p256_addcarryx_u32(&x467, &x468, x466, x436, 0x0);
  uint32_t x469;
  fiat_p256_uint1 x470;
  fiat_p256_addcarryx_u32(&x469, &x470, x468, x438, x426);
  uint32_t x471;
  fiat_p256_uint1 x472;
  fiat_p256_addcarryx_u32(&x471, &x472, x470, x440, x444);
  uint32_t x473;
  fiat_p256_uint1 x474;
  fiat_p256_addcarryx_u32(&x473, &x474, x472, x442, x445);
  uint32_t x475 = ((uint32_t)x474 + x443);
  uint32_t x476;
  uint32_t x477;
  fiat_p256_mulx_u32(&x476, &x477, x6, (arg1[7]));
  uint32_t x478;
  uint32_t x479;
  fiat_p256_mulx_u32(&x478, &x479, x6, (arg1[6]));
  uint32_t x480;
  uint32_t x481;
  fiat_p256_mulx_u32(&x480, &x481, x6, (arg1[5]));
  uint32_t x482;
  uint32_t x483;
  fiat_p256_mulx_u32(&x482, &x483, x6, (arg1[4]));
  uint32_t x484;
  uint32_t x485;
  fiat_p256_mulx_u32(&x484, &x485, x6, (arg1[3]));
  uint32_t x486;
  uint32_t x487;
  fiat_p256_mulx_u32(&x486, &x487, x6, (arg1[2]));
  uint32_t x488;
  uint32_t x489;
  fiat_p256_mulx_u32(&x488, &x489, x6, (arg1[1]));
  uint32_t x490;
  uint32_t x491;
  fiat_p256_mulx_u32(&x490, &x491, x6, (arg1[0]));
  uint32_t x492;
  fiat_p256_uint1 x493;
  fiat_p256_addcarryx_u32(&x492, &x493, 0x0, x491, x488);
  uint32_t x494;
  fiat_p256_uint1 x495;
  fiat_p256_addcarryx_u32(&x494, &x495, x493, x489, x486);
  uint32_t x496;
  fiat_p256_uint1 x497;
  fiat_p256_addcarryx_u32(&x496, &x497, x495, x487, x484);
  uint32_t x498;
  fiat_p256_uint1 x499;
  fiat_p256_addcarryx_u32(&x498, &x499, x497, x485, x482);
  uint32_t x500;
  fiat_p256_uint1 x501;
  fiat_p256_addcarryx_u32(&x500, &x501, x499, x483, x480);
  uint32_t x502;
  fiat_p256_uint1 x503;
  fiat_p256_addcarryx_u32(&x502, &x503, x501, x481, x478);
  uint32_t x504;
  fiat_p256_uint1 x505;
  fiat_p256_addcarryx_u32(&x504, &x505, x503, x479, x476);
  uint32_t x506 = (x505 + x477);
  uint32_t x507;
  fiat_p256_uint1 x508;
  fiat_p256_addcarryx_u32(&x507, &x508, 0x0, x459, x490);
  uint32_t x509;
  fiat_p256_uint1 x510;
  fiat_p256_addcarryx_u32(&x509, &x510, x508, x461, x492);
  uint32_t x511;
  fiat_p256_uint1 x512;
  fiat_p256_addcarryx_u32(&x511, &x512, x510, x463, x494);
  uint32_t x513;
  fiat_p256_uint1 x514;
  fiat_p256_addcarryx_u32(&x513, &x514, x512, x465, x496);
  uint32_t x515;
  fiat_p256_uint1 x516;
  fiat_p256_addcarryx_u32(&x515, &x516, x514, x467, x498);
  uint32_t x517;
  fiat_p256_uint1 x518;
  fiat_p256_addcarryx_u32(&x517, &x518, x516, x469, x500);
  uint32_t x519;
  fiat_p256_uint1 x520;
  fiat_p256_addcarryx_u32(&x519, &x520, x518, x471, x502);
  uint32_t x521;
  fiat_p256_uint1 x522;
  fiat_p256_addcarryx_u32(&x521, &x522, x520, x473, x504);
  uint32_t x523;
  fiat_p256_uint1 x524;
  fiat_p256_addcarryx_u32(&x523, &x524, x522, x475, x506);
  uint32_t x525;
  uint32_t x526;
  fiat_p256_mulx_u32(&x525, &x526, x507, UINT32_C(0xffffffff));
  uint32_t x527;
  uint32_t x528;
  fiat_p256_mulx_u32(&x527, &x528, x507, UINT32_C(0xffffffff));
  uint32_t x529;
  uint32_t x530;
  fiat_p256_mulx_u32(&x529, &x530, x507, UINT32_C(0xffffffff));
  uint32_t x531;
  uint32_t x532;
  fiat_p256_mulx_u32(&x531, &x532, x507, UINT32_C(0xffffffff));
  uint32_t x533;
  fiat_p256_uint1 x534;
  fiat_p256_addcarryx_u32(&x533, &x534, 0x0, x532, x529);
  uint32_t x535;
  fiat_p256_uint1 x536;
  fiat_p256_addcarryx_u32(&x535, &x536, x534, x530, x527);
  uint32_t x537 = (x536 + x528);
  uint32_t x538;
  fiat_p256_uint1 x539;
  fiat_p256_addcarryx_u32(&x538, &x539, 0x0, x507, x531);
  uint32_t x540;
  fiat_p256_uint1 x541;
  fiat_p256_addcarryx_u32(&x540, &x541, x539, x509, x533);
  uint32_t x542;
  fiat_p256_uint1 x543;
  fiat_p256_addcarryx_u32(&x542, &x543, x541, x511, x535);
  uint32_t x544;
  fiat_p256_uint1 x545;
  fiat_p256_addcarryx_u32(&x544, &x545, x543, x513, x537);
  uint32_t x546;
  fiat_p256_uint1 x547;
  fiat_p256_addcarryx_u32(&x546, &x547, x545, x515, 0x0);
  uint32_t x548;
  fiat_p256_uint1 x549;
  fiat_p256_addcarryx_u32(&x548, &x549, x547, x517, 0x0);
  uint32_t x550;
  fiat_p256_uint1 x551;
  fiat_p256_addcarryx_u32(&x550, &x551, x549, x519, x507);
  uint32_t x552;
  fiat_p256_uint1 x553;
  fiat_p256_addcarryx_u32(&x552, &x553, x551, x521, x525);
  uint32_t x554;
  fiat_p256_uint1 x555;
  fiat_p256_addcarryx_u32(&x554, &x555, x553, x523, x526);
  uint32_t x556 = ((uint32_t)x555 + x524);
  uint32_t x557;
  uint32_t x558;
  fiat_p256_mulx_u32(&x557, &x558, x7, (arg1[7]));
  uint32_t x559;
  uint32_t x560;
  fiat_p256_mulx_u32(&x559, &x560, x7, (arg1[6]));
  uint32_t x561;
  uint32_t x562;
  fiat_p256_mulx_u32(&x561, &x562, x7, (arg1[5]));
  uint32_t x563;
  uint32_t x564;
  fiat_p256_mulx_u32(&x563, &x564, x7, (arg1[4]));
  uint32_t x565;
  uint32_t x566;
  fiat_p256_mulx_u32(&x565, &x566, x7, (arg1[3]));
  uint32_t x567;
  uint32_t x568;
  fiat_p256_mulx_u32(&x567, &x568, x7, (arg1[2]));
  uint32_t x569;
  uint32_t x570;
  fiat_p256_mulx_u32(&x569, &x570, x7, (arg1[1]));
  uint32_t x571;
  uint32_t x572;
  fiat_p256_mulx_u32(&x571, &x572, x7, (arg1[0]));
  uint32_t x573;
  fiat_p256_uint1 x574;
  fiat_p256_addcarryx_u32(&x573, &x574, 0x0, x572, x569);
  uint32_t x575;
  fiat_p256_uint1 x576;
  fiat_p256_addcarryx_u32(&x575, &x576, x574, x570, x567);
  uint32_t x577;
  fiat_p256_uint1 x578;
  fiat_p256_addcarryx_u32(&x577, &x578, x576, x568, x565);
  uint32_t x579;
  fiat_p256_uint1 x580;
  fiat_p256_addcarryx_u32(&x579, &x580, x578, x566, x563);
  uint32_t x581;
  fiat_p256_uint1 x582;
  fiat_p256_addcarryx_u32(&x581, &x582, x580, x564, x561);
  uint32_t x583;
  fiat_p256_uint1 x584;
  fiat_p256_addcarryx_u32(&x583, &x584, x582, x562, x559);
  uint32_t x585;
  fiat_p256_uint1 x586;
  fiat_p256_addcarryx_u32(&x585, &x586, x584, x560, x557);
  uint32_t x587 = (x586 + x558);
  uint32_t x588;
  fiat_p256_uint1 x589;
  fiat_p256_addcarryx_u32(&x588, &x589, 0x0, x540, x571);
  uint32_t x590;
  fiat_p256_uint1 x591;
  fiat_p256_addcarryx_u32(&x590, &x591, x589, x542, x573);
  uint32_t x592;
  fiat_p256_uint1 x593;
  fiat_p256_addcarryx_u32(&x592, &x593, x591, x544, x575);
  uint32_t x594;
  fiat_p256_uint1 x595;
  fiat_p256_addcarryx_u32(&x594, &x595, x593, x546, x577);
  uint32_t x596;
  fiat_p256_uint1 x597;
  fiat_p256_addcarryx_u32(&x596, &x597, x595, x548, x579);
  uint32_t x598;
  fiat_p256_uint1 x599;
  fiat_p256_addcarryx_u32(&x598, &x599, x597, x550, x581);
  uint32_t x600;
  fiat_p256_uint1 x601;
  fiat_p256_addcarryx_u32(&x600, &x601, x599, x552, x583);
  uint32_t x602;
  fiat_p256_uint1 x603;
  fiat_p256_addcarryx_u32(&x602, &x603, x601, x554, x585);
  uint32_t x604;
  fiat_p256_uint1 x605;
  fiat_p256_addcarryx_u32(&x604, &x605, x603, x556, x587);
  uint32_t x606;
  uint32_t x607;
  fiat_p256_mulx_u32(&x606, &x607, x588, UINT32_C(0xffffffff));
  uint32_t x608;
  uint32_t x609;
  fiat_p256_mulx_u32(&x608, &x609, x588, UINT32_C(0xffffffff));
  uint32_t x610;
  uint32_t x611;
  fiat_p256_mulx_u32(&x610, &x611, x588, UINT32_C(0xffffffff));
  uint32_t x612;
  uint32_t x613;
  fiat_p256_mulx_u32(&x612, &x613, x588, UINT32_C(0xffffffff));
  uint32_t x614;
  fiat_p256_uint1 x615;
  fiat_p256_addcarryx_u32(&x614, &x615, 0x0, x613, x610);
  uint32_t x616;
  fiat_p256_uint1 x617;
  fiat_p256_addcarryx_u32(&x616, &x617, x615, x611, x608);
  uint32_t x618 = (x617 + x609);
  uint32_t x619;
  fiat_p256_uint1 x620;
  fiat_p256_addcarryx_u32(&x619, &x620, 0x0, x588, x612);
  uint32_t x621;
  fiat_p256_uint1 x622;
  fiat_p256_addcarryx_u32(&x621, &x622, x620, x590, x614);
  uint32_t x623;
  fiat_p256_uint1 x624;
  fiat_p256_addcarryx_u32(&x623, &x624, x622, x592, x616);
  uint32_t x625;
  fiat_p256_uint1 x626;
  fiat_p256_addcarryx_u32(&x625, &x626, x624, x594, x618);
  uint32_t x627;
  fiat_p256_uint1 x628;
  fiat_p256_addcarryx_u32(&x627, &x628, x626, x596, 0x0);
  uint32_t x629;
  fiat_p256_uint1 x630;
  fiat_p256_addcarryx_u32(&x629, &x630, x628, x598, 0x0);
  uint32_t x631;
  fiat_p256_uint1 x632;
  fiat_p256_addcarryx_u32(&x631, &x632, x630, x600, x588);
  uint32_t x633;
  fiat_p256_uint1 x634;
  fiat_p256_addcarryx_u32(&x633, &x634, x632, x602, x606);
  uint32_t x635;
  fiat_p256_uint1 x636;
  fiat_p256_addcarryx_u32(&x635, &x636, x634, x604, x607);
  uint32_t x637 = ((uint32_t)x636 + x605);
  uint32_t x638;
  fiat_p256_uint1 x639;
  fiat_p256_subborrowx_u32(&x638, &x639, 0x0, x621, UINT32_C(0xffffffff));
  uint32_t x640;
  fiat_p256_uint1 x641;
  fiat_p256_subborrowx_u32(&x640, &x641, x639, x623, UINT32_C(0xffffffff));
  uint32_t x642;
  fiat_p256_uint1 x643;
  fiat_p256_subborrowx_u32(&x642, &x643, x641, x625, UINT32_C(0xffffffff));
  uint32_t x644;
  fiat_p256_uint1 x645;
  fiat_p256_subborrowx_u32(&x644, &x645, x643, x627, 0x0);
  uint32_t x646;
  fiat_p256_uint1 x647;
  fiat_p256_subborrowx_u32(&x646, &x647, x645, x629, 0x0);
  uint32_t x648;
  fiat_p256_uint1 x649;
  fiat_p256_subborrowx_u32(&x648, &x649, x647, x631, 0x0);
  uint32_t x650;
  fiat_p256_uint1 x651;
  fiat_p256_subborrowx_u32(&x650, &x651, x649, x633, 0x1);
  uint32_t x652;
  fiat_p256_uint1 x653;
  fiat_p256_subborrowx_u32(&x652, &x653, x651, x635, UINT32_C(0xffffffff));
  uint32_t x654;
  fiat_p256_uint1 x655;
  fiat_p256_subborrowx_u32(&x654, &x655, x653, x637, 0x0);
  uint32_t x656;
  fiat_p256_cmovznz_u32(&x656, x655, x638, x621);
  uint32_t x657;
  fiat_p256_cmovznz_u32(&x657, x655, x640, x623);
  uint32_t x658;
  fiat_p256_cmovznz_u32(&x658, x655, x642, x625);
  uint32_t x659;
  fiat_p256_cmovznz_u32(&x659, x655, x644, x627);
  uint32_t x660;
  fiat_p256_cmovznz_u32(&x660, x655, x646, x629);
  uint32_t x661;
  fiat_p256_cmovznz_u32(&x661, x655, x648, x631);
  uint32_t x662;
  fiat_p256_cmovznz_u32(&x662, x655, x650, x633);
  uint32_t x663;
  fiat_p256_cmovznz_u32(&x663, x655, x652, x635);
  out1[0] = x656;
  out1[1] = x657;
  out1[2] = x658;
  out1[3] = x659;
  out1[4] = x660;
  out1[5] = x661;
  out1[6] = x662;
  out1[7] = x663;
}

/*
 * The function fiat_p256_add adds two field elements in the Montgomery domain.
 * Preconditions:
 *   0 ≤ eval arg1 < m
 *   0 ≤ eval arg2 < m
 * Postconditions:
 *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
 *   0 ≤ eval out1 < m
 *
 * Input Bounds:
 *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 *   arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 * Output Bounds:
 *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 */
static void fiat_p256_add(uint32_t out1[8], const uint32_t arg1[8], const uint32_t arg2[8]) {
  uint32_t x1;
  fiat_p256_uint1 x2;
  fiat_p256_addcarryx_u32(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
  uint32_t x3;
  fiat_p256_uint1 x4;
  fiat_p256_addcarryx_u32(&x3, &x4, x2, (arg1[1]), (arg2[1]));
  uint32_t x5;
  fiat_p256_uint1 x6;
  fiat_p256_addcarryx_u32(&x5, &x6, x4, (arg1[2]), (arg2[2]));
  uint32_t x7;
  fiat_p256_uint1 x8;
  fiat_p256_addcarryx_u32(&x7, &x8, x6, (arg1[3]), (arg2[3]));
  uint32_t x9;
  fiat_p256_uint1 x10;
  fiat_p256_addcarryx_u32(&x9, &x10, x8, (arg1[4]), (arg2[4]));
  uint32_t x11;
  fiat_p256_uint1 x12;
  fiat_p256_addcarryx_u32(&x11, &x12, x10, (arg1[5]), (arg2[5]));
  uint32_t x13;
  fiat_p256_uint1 x14;
  fiat_p256_addcarryx_u32(&x13, &x14, x12, (arg1[6]), (arg2[6]));
  uint32_t x15;
  fiat_p256_uint1 x16;
  fiat_p256_addcarryx_u32(&x15, &x16, x14, (arg1[7]), (arg2[7]));
  uint32_t x17;
  fiat_p256_uint1 x18;
  fiat_p256_subborrowx_u32(&x17, &x18, 0x0, x1, UINT32_C(0xffffffff));
  uint32_t x19;
  fiat_p256_uint1 x20;
  fiat_p256_subborrowx_u32(&x19, &x20, x18, x3, UINT32_C(0xffffffff));
  uint32_t x21;
  fiat_p256_uint1 x22;
  fiat_p256_subborrowx_u32(&x21, &x22, x20, x5, UINT32_C(0xffffffff));
  uint32_t x23;
  fiat_p256_uint1 x24;
  fiat_p256_subborrowx_u32(&x23, &x24, x22, x7, 0x0);
  uint32_t x25;
  fiat_p256_uint1 x26;
  fiat_p256_subborrowx_u32(&x25, &x26, x24, x9, 0x0);
  uint32_t x27;
  fiat_p256_uint1 x28;
  fiat_p256_subborrowx_u32(&x27, &x28, x26, x11, 0x0);
  uint32_t x29;
  fiat_p256_uint1 x30;
  fiat_p256_subborrowx_u32(&x29, &x30, x28, x13, 0x1);
  uint32_t x31;
  fiat_p256_uint1 x32;
  fiat_p256_subborrowx_u32(&x31, &x32, x30, x15, UINT32_C(0xffffffff));
  uint32_t x33;
  fiat_p256_uint1 x34;
  fiat_p256_subborrowx_u32(&x33, &x34, x32, x16, 0x0);
  uint32_t x35;
  fiat_p256_cmovznz_u32(&x35, x34, x17, x1);
  uint32_t x36;
  fiat_p256_cmovznz_u32(&x36, x34, x19, x3);
  uint32_t x37;
  fiat_p256_cmovznz_u32(&x37, x34, x21, x5);
  uint32_t x38;
  fiat_p256_cmovznz_u32(&x38, x34, x23, x7);
  uint32_t x39;
  fiat_p256_cmovznz_u32(&x39, x34, x25, x9);
  uint32_t x40;
  fiat_p256_cmovznz_u32(&x40, x34, x27, x11);
  uint32_t x41;
  fiat_p256_cmovznz_u32(&x41, x34, x29, x13);
  uint32_t x42;
  fiat_p256_cmovznz_u32(&x42, x34, x31, x15);
  out1[0] = x35;
  out1[1] = x36;
  out1[2] = x37;
  out1[3] = x38;
  out1[4] = x39;
  out1[5] = x40;
  out1[6] = x41;
  out1[7] = x42;
}

/*
 * The function fiat_p256_sub subtracts two field elements in the Montgomery domain.
 * Preconditions:
 *   0 ≤ eval arg1 < m
 *   0 ≤ eval arg2 < m
 * Postconditions:
 *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
 *   0 ≤ eval out1 < m
 *
 * Input Bounds:
 *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 *   arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 * Output Bounds:
 *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 */
static void fiat_p256_sub(uint32_t out1[8], const uint32_t arg1[8], const uint32_t arg2[8]) {
  uint32_t x1;
  fiat_p256_uint1 x2;
  fiat_p256_subborrowx_u32(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
  uint32_t x3;
  fiat_p256_uint1 x4;
  fiat_p256_subborrowx_u32(&x3, &x4, x2, (arg1[1]), (arg2[1]));
  uint32_t x5;
  fiat_p256_uint1 x6;
  fiat_p256_subborrowx_u32(&x5, &x6, x4, (arg1[2]), (arg2[2]));
  uint32_t x7;
  fiat_p256_uint1 x8;
  fiat_p256_subborrowx_u32(&x7, &x8, x6, (arg1[3]), (arg2[3]));
  uint32_t x9;
  fiat_p256_uint1 x10;
  fiat_p256_subborrowx_u32(&x9, &x10, x8, (arg1[4]), (arg2[4]));
  uint32_t x11;
  fiat_p256_uint1 x12;
  fiat_p256_subborrowx_u32(&x11, &x12, x10, (arg1[5]), (arg2[5]));
  uint32_t x13;
  fiat_p256_uint1 x14;
  fiat_p256_subborrowx_u32(&x13, &x14, x12, (arg1[6]), (arg2[6]));
  uint32_t x15;
  fiat_p256_uint1 x16;
  fiat_p256_subborrowx_u32(&x15, &x16, x14, (arg1[7]), (arg2[7]));
  uint32_t x17;
  fiat_p256_cmovznz_u32(&x17, x16, 0x0, UINT32_C(0xffffffff));
  uint32_t x18;
  fiat_p256_uint1 x19;
  fiat_p256_addcarryx_u32(&x18, &x19, 0x0, x1, (x17 & UINT32_C(0xffffffff)));
  uint32_t x20;
  fiat_p256_uint1 x21;
  fiat_p256_addcarryx_u32(&x20, &x21, x19, x3, (x17 & UINT32_C(0xffffffff)));
  uint32_t x22;
  fiat_p256_uint1 x23;
  fiat_p256_addcarryx_u32(&x22, &x23, x21, x5, (x17 & UINT32_C(0xffffffff)));
  uint32_t x24;
  fiat_p256_uint1 x25;
  fiat_p256_addcarryx_u32(&x24, &x25, x23, x7, 0x0);
  uint32_t x26;
  fiat_p256_uint1 x27;
  fiat_p256_addcarryx_u32(&x26, &x27, x25, x9, 0x0);
  uint32_t x28;
  fiat_p256_uint1 x29;
  fiat_p256_addcarryx_u32(&x28, &x29, x27, x11, 0x0);
  uint32_t x30;
  fiat_p256_uint1 x31;
  fiat_p256_addcarryx_u32(&x30, &x31, x29, x13, (fiat_p256_uint1)(x17 & 0x1));
  uint32_t x32;
  fiat_p256_uint1 x33;
  fiat_p256_addcarryx_u32(&x32, &x33, x31, x15, (x17 & UINT32_C(0xffffffff)));
  out1[0] = x18;
  out1[1] = x20;
  out1[2] = x22;
  out1[3] = x24;
  out1[4] = x26;
  out1[5] = x28;
  out1[6] = x30;
  out1[7] = x32;
}

/*
 * The function fiat_p256_opp negates a field element in the Montgomery domain.
 * Preconditions:
 *   0 ≤ eval arg1 < m
 * Postconditions:
 *   eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
 *   0 ≤ eval out1 < m
 *
 * Input Bounds:
 *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 * Output Bounds:
 *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 */
static void fiat_p256_opp(uint32_t out1[8], const uint32_t arg1[8]) {
  uint32_t x1;
  fiat_p256_uint1 x2;
  fiat_p256_subborrowx_u32(&x1, &x2, 0x0, 0x0, (arg1[0]));
  uint32_t x3;
  fiat_p256_uint1 x4;
  fiat_p256_subborrowx_u32(&x3, &x4, x2, 0x0, (arg1[1]));
  uint32_t x5;
  fiat_p256_uint1 x6;
  fiat_p256_subborrowx_u32(&x5, &x6, x4, 0x0, (arg1[2]));
  uint32_t x7;
  fiat_p256_uint1 x8;
  fiat_p256_subborrowx_u32(&x7, &x8, x6, 0x0, (arg1[3]));
  uint32_t x9;
  fiat_p256_uint1 x10;
  fiat_p256_subborrowx_u32(&x9, &x10, x8, 0x0, (arg1[4]));
  uint32_t x11;
  fiat_p256_uint1 x12;
  fiat_p256_subborrowx_u32(&x11, &x12, x10, 0x0, (arg1[5]));
  uint32_t x13;
  fiat_p256_uint1 x14;
  fiat_p256_subborrowx_u32(&x13, &x14, x12, 0x0, (arg1[6]));
  uint32_t x15;
  fiat_p256_uint1 x16;
  fiat_p256_subborrowx_u32(&x15, &x16, x14, 0x0, (arg1[7]));
  uint32_t x17;
  fiat_p256_cmovznz_u32(&x17, x16, 0x0, UINT32_C(0xffffffff));
  uint32_t x18;
  fiat_p256_uint1 x19;
  fiat_p256_addcarryx_u32(&x18, &x19, 0x0, x1, (x17 & UINT32_C(0xffffffff)));
  uint32_t x20;
  fiat_p256_uint1 x21;
  fiat_p256_addcarryx_u32(&x20, &x21, x19, x3, (x17 & UINT32_C(0xffffffff)));
  uint32_t x22;
  fiat_p256_uint1 x23;
  fiat_p256_addcarryx_u32(&x22, &x23, x21, x5, (x17 & UINT32_C(0xffffffff)));
  uint32_t x24;
  fiat_p256_uint1 x25;
  fiat_p256_addcarryx_u32(&x24, &x25, x23, x7, 0x0);
  uint32_t x26;
  fiat_p256_uint1 x27;
  fiat_p256_addcarryx_u32(&x26, &x27, x25, x9, 0x0);
  uint32_t x28;
  fiat_p256_uint1 x29;
  fiat_p256_addcarryx_u32(&x28, &x29, x27, x11, 0x0);
  uint32_t x30;
  fiat_p256_uint1 x31;
  fiat_p256_addcarryx_u32(&x30, &x31, x29, x13, (fiat_p256_uint1)(x17 & 0x1));
  uint32_t x32;
  fiat_p256_uint1 x33;
  fiat_p256_addcarryx_u32(&x32, &x33, x31, x15, (x17 & UINT32_C(0xffffffff)));
  out1[0] = x18;
  out1[1] = x20;
  out1[2] = x22;
  out1[3] = x24;
  out1[4] = x26;
  out1[5] = x28;
  out1[6] = x30;
  out1[7] = x32;
}

/*
 * The function fiat_p256_from_montgomery translates a field element out of the Montgomery domain.
 * Preconditions:
 *   0 ≤ eval arg1 < m
 * Postconditions:
 *   eval out1 mod m = (eval arg1 * ((2^32)⁻¹ mod m)^8) mod m
 *   0 ≤ eval out1 < m
 *
 * Input Bounds:
 *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 * Output Bounds:
 *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 */
static void fiat_p256_from_montgomery(uint32_t out1[8], const uint32_t arg1[8]) {
  uint32_t x1 = (arg1[0]);
  uint32_t x2;
  uint32_t x3;
  fiat_p256_mulx_u32(&x2, &x3, x1, UINT32_C(0xffffffff));
  uint32_t x4;
  uint32_t x5;
  fiat_p256_mulx_u32(&x4, &x5, x1, UINT32_C(0xffffffff));
  uint32_t x6;
  uint32_t x7;
  fiat_p256_mulx_u32(&x6, &x7, x1, UINT32_C(0xffffffff));
  uint32_t x8;
  uint32_t x9;
  fiat_p256_mulx_u32(&x8, &x9, x1, UINT32_C(0xffffffff));
  uint32_t x10;
  fiat_p256_uint1 x11;
  fiat_p256_addcarryx_u32(&x10, &x11, 0x0, x9, x6);
  uint32_t x12;
  fiat_p256_uint1 x13;
  fiat_p256_addcarryx_u32(&x12, &x13, x11, x7, x4);
  uint32_t x14;
  fiat_p256_uint1 x15;
  fiat_p256_addcarryx_u32(&x14, &x15, 0x0, x1, x8);
  uint32_t x16;
  fiat_p256_uint1 x17;
  fiat_p256_addcarryx_u32(&x16, &x17, x15, 0x0, x10);
  uint32_t x18;
  fiat_p256_uint1 x19;
  fiat_p256_addcarryx_u32(&x18, &x19, x17, 0x0, x12);
  uint32_t x20;
  fiat_p256_uint1 x21;
  fiat_p256_addcarryx_u32(&x20, &x21, x19, 0x0, (x13 + x5));
  uint32_t x22;
  fiat_p256_uint1 x23;
  fiat_p256_addcarryx_u32(&x22, &x23, 0x0, x16, (arg1[1]));
  uint32_t x24;
  fiat_p256_uint1 x25;
  fiat_p256_addcarryx_u32(&x24, &x25, x23, x18, 0x0);
  uint32_t x26;
  fiat_p256_uint1 x27;
  fiat_p256_addcarryx_u32(&x26, &x27, x25, x20, 0x0);
  uint32_t x28;
  uint32_t x29;
  fiat_p256_mulx_u32(&x28, &x29, x22, UINT32_C(0xffffffff));
  uint32_t x30;
  uint32_t x31;
  fiat_p256_mulx_u32(&x30, &x31, x22, UINT32_C(0xffffffff));
  uint32_t x32;
  uint32_t x33;
  fiat_p256_mulx_u32(&x32, &x33, x22, UINT32_C(0xffffffff));
  uint32_t x34;
  uint32_t x35;
  fiat_p256_mulx_u32(&x34, &x35, x22, UINT32_C(0xffffffff));
  uint32_t x36;
  fiat_p256_uint1 x37;
  fiat_p256_addcarryx_u32(&x36, &x37, 0x0, x35, x32);
  uint32_t x38;
  fiat_p256_uint1 x39;
  fiat_p256_addcarryx_u32(&x38, &x39, x37, x33, x30);
  uint32_t x40;
  fiat_p256_uint1 x41;
  fiat_p256_addcarryx_u32(&x40, &x41, 0x0, x22, x34);
  uint32_t x42;
  fiat_p256_uint1 x43;
  fiat_p256_addcarryx_u32(&x42, &x43, x41, x24, x36);
  uint32_t x44;
  fiat_p256_uint1 x45;
  fiat_p256_addcarryx_u32(&x44, &x45, x43, x26, x38);
  uint32_t x46;
  fiat_p256_uint1 x47;
  fiat_p256_addcarryx_u32(&x46, &x47, x45, ((uint32_t)x27 + x21), (x39 + x31));
  uint32_t x48;
  fiat_p256_uint1 x49;
  fiat_p256_addcarryx_u32(&x48, &x49, 0x0, x2, x22);
  uint32_t x50;
  fiat_p256_uint1 x51;
  fiat_p256_addcarryx_u32(&x50, &x51, x49, x3, x28);
  uint32_t x52;
  fiat_p256_uint1 x53;
  fiat_p256_addcarryx_u32(&x52, &x53, 0x0, x42, (arg1[2]));
  uint32_t x54;
  fiat_p256_uint1 x55;
  fiat_p256_addcarryx_u32(&x54, &x55, x53, x44, 0x0);
  uint32_t x56;
  fiat_p256_uint1 x57;
  fiat_p256_addcarryx_u32(&x56, &x57, x55, x46, 0x0);
  uint32_t x58;
  uint32_t x59;
  fiat_p256_mulx_u32(&x58, &x59, x52, UINT32_C(0xffffffff));
  uint32_t x60;
  uint32_t x61;
  fiat_p256_mulx_u32(&x60, &x61, x52, UINT32_C(0xffffffff));
  uint32_t x62;
  uint32_t x63;
  fiat_p256_mulx_u32(&x62, &x63, x52, UINT32_C(0xffffffff));
  uint32_t x64;
  uint32_t x65;
  fiat_p256_mulx_u32(&x64, &x65, x52, UINT32_C(0xffffffff));
  uint32_t x66;
  fiat_p256_uint1 x67;
  fiat_p256_addcarryx_u32(&x66, &x67, 0x0, x65, x62);
  uint32_t x68;
  fiat_p256_uint1 x69;
  fiat_p256_addcarryx_u32(&x68, &x69, x67, x63, x60);
  uint32_t x70;
  fiat_p256_uint1 x71;
  fiat_p256_addcarryx_u32(&x70, &x71, 0x0, x52, x64);
  uint32_t x72;
  fiat_p256_uint1 x73;
  fiat_p256_addcarryx_u32(&x72, &x73, x71, x54, x66);
  uint32_t x74;
  fiat_p256_uint1 x75;
  fiat_p256_addcarryx_u32(&x74, &x75, x73, x56, x68);
  uint32_t x76;
  fiat_p256_uint1 x77;
  fiat_p256_addcarryx_u32(&x76, &x77, x75, ((uint32_t)x57 + x47), (x69 + x61));
  uint32_t x78;
  fiat_p256_uint1 x79;
  fiat_p256_addcarryx_u32(&x78, &x79, x77, x1, 0x0);
  uint32_t x80;
  fiat_p256_uint1 x81;
  fiat_p256_addcarryx_u32(&x80, &x81, x79, x48, 0x0);
  uint32_t x82;
  fiat_p256_uint1 x83;
  fiat_p256_addcarryx_u32(&x82, &x83, x81, x50, x52);
  uint32_t x84;
  fiat_p256_uint1 x85;
  fiat_p256_addcarryx_u32(&x84, &x85, x83, (x51 + x29), x58);
  uint32_t x86;
  fiat_p256_uint1 x87;
  fiat_p256_addcarryx_u32(&x86, &x87, 0x0, x72, (arg1[3]));
  uint32_t x88;
  fiat_p256_uint1 x89;
  fiat_p256_addcarryx_u32(&x88, &x89, x87, x74, 0x0);
  uint32_t x90;
  fiat_p256_uint1 x91;
  fiat_p256_addcarryx_u32(&x90, &x91, x89, x76, 0x0);
  uint32_t x92;
  fiat_p256_uint1 x93;
  fiat_p256_addcarryx_u32(&x92, &x93, x91, x78, 0x0);
  uint32_t x94;
  fiat_p256_uint1 x95;
  fiat_p256_addcarryx_u32(&x94, &x95, x93, x80, 0x0);
  uint32_t x96;
  fiat_p256_uint1 x97;
  fiat_p256_addcarryx_u32(&x96, &x97, x95, x82, 0x0);
  uint32_t x98;
  fiat_p256_uint1 x99;
  fiat_p256_addcarryx_u32(&x98, &x99, x97, x84, 0x0);
  uint32_t x100;
  fiat_p256_uint1 x101;
  fiat_p256_addcarryx_u32(&x100, &x101, x99, (x85 + x59), 0x0);
  uint32_t x102;
  uint32_t x103;
  fiat_p256_mulx_u32(&x102, &x103, x86, UINT32_C(0xffffffff));
  uint32_t x104;
  uint32_t x105;
  fiat_p256_mulx_u32(&x104, &x105, x86, UINT32_C(0xffffffff));
  uint32_t x106;
  uint32_t x107;
  fiat_p256_mulx_u32(&x106, &x107, x86, UINT32_C(0xffffffff));
  uint32_t x108;
  uint32_t x109;
  fiat_p256_mulx_u32(&x108, &x109, x86, UINT32_C(0xffffffff));
  uint32_t x110;
  fiat_p256_uint1 x111;
  fiat_p256_addcarryx_u32(&x110, &x111, 0x0, x109, x106);
  uint32_t x112;
  fiat_p256_uint1 x113;
  fiat_p256_addcarryx_u32(&x112, &x113, x111, x107, x104);
  uint32_t x114;
  fiat_p256_uint1 x115;
  fiat_p256_addcarryx_u32(&x114, &x115, 0x0, x86, x108);
  uint32_t x116;
  fiat_p256_uint1 x117;
  fiat_p256_addcarryx_u32(&x116, &x117, x115, x88, x110);
  uint32_t x118;
  fiat_p256_uint1 x119;
  fiat_p256_addcarryx_u32(&x118, &x119, x117, x90, x112);
  uint32_t x120;
  fiat_p256_uint1 x121;
  fiat_p256_addcarryx_u32(&x120, &x121, x119, x92, (x113 + x105));
  uint32_t x122;
  fiat_p256_uint1 x123;
  fiat_p256_addcarryx_u32(&x122, &x123, x121, x94, 0x0);
  uint32_t x124;
  fiat_p256_uint1 x125;
  fiat_p256_addcarryx_u32(&x124, &x125, x123, x96, 0x0);
  uint32_t x126;
  fiat_p256_uint1 x127;
  fiat_p256_addcarryx_u32(&x126, &x127, x125, x98, x86);
  uint32_t x128;
  fiat_p256_uint1 x129;
  fiat_p256_addcarryx_u32(&x128, &x129, x127, x100, x102);
  uint32_t x130;
  fiat_p256_uint1 x131;
  fiat_p256_addcarryx_u32(&x130, &x131, x129, x101, x103);
  uint32_t x132;
  fiat_p256_uint1 x133;
  fiat_p256_addcarryx_u32(&x132, &x133, 0x0, x116, (arg1[4]));
  uint32_t x134;
  fiat_p256_uint1 x135;
  fiat_p256_addcarryx_u32(&x134, &x135, x133, x118, 0x0);
  uint32_t x136;
  fiat_p256_uint1 x137;
  fiat_p256_addcarryx_u32(&x136, &x137, x135, x120, 0x0);
  uint32_t x138;
  fiat_p256_uint1 x139;
  fiat_p256_addcarryx_u32(&x138, &x139, x137, x122, 0x0);
  uint32_t x140;
  fiat_p256_uint1 x141;
  fiat_p256_addcarryx_u32(&x140, &x141, x139, x124, 0x0);
  uint32_t x142;
  fiat_p256_uint1 x143;
  fiat_p256_addcarryx_u32(&x142, &x143, x141, x126, 0x0);
  uint32_t x144;
  fiat_p256_uint1 x145;
  fiat_p256_addcarryx_u32(&x144, &x145, x143, x128, 0x0);
  uint32_t x146;
  fiat_p256_uint1 x147;
  fiat_p256_addcarryx_u32(&x146, &x147, x145, x130, 0x0);
  uint32_t x148;
  uint32_t x149;
  fiat_p256_mulx_u32(&x148, &x149, x132, UINT32_C(0xffffffff));
  uint32_t x150;
  uint32_t x151;
  fiat_p256_mulx_u32(&x150, &x151, x132, UINT32_C(0xffffffff));
  uint32_t x152;
  uint32_t x153;
  fiat_p256_mulx_u32(&x152, &x153, x132, UINT32_C(0xffffffff));
  uint32_t x154;
  uint32_t x155;
  fiat_p256_mulx_u32(&x154, &x155, x132, UINT32_C(0xffffffff));
  uint32_t x156;
  fiat_p256_uint1 x157;
  fiat_p256_addcarryx_u32(&x156, &x157, 0x0, x155, x152);
  uint32_t x158;
  fiat_p256_uint1 x159;
  fiat_p256_addcarryx_u32(&x158, &x159, x157, x153, x150);
  uint32_t x160;
  fiat_p256_uint1 x161;
  fiat_p256_addcarryx_u32(&x160, &x161, 0x0, x132, x154);
  uint32_t x162;
  fiat_p256_uint1 x163;
  fiat_p256_addcarryx_u32(&x162, &x163, x161, x134, x156);
  uint32_t x164;
  fiat_p256_uint1 x165;
  fiat_p256_addcarryx_u32(&x164, &x165, x163, x136, x158);
  uint32_t x166;
  fiat_p256_uint1 x167;
  fiat_p256_addcarryx_u32(&x166, &x167, x165, x138, (x159 + x151));
  uint32_t x168;
  fiat_p256_uint1 x169;
  fiat_p256_addcarryx_u32(&x168, &x169, x167, x140, 0x0);
  uint32_t x170;
  fiat_p256_uint1 x171;
  fiat_p256_addcarryx_u32(&x170, &x171, x169, x142, 0x0);
  uint32_t x172;
  fiat_p256_uint1 x173;
  fiat_p256_addcarryx_u32(&x172, &x173, x171, x144, x132);
  uint32_t x174;
  fiat_p256_uint1 x175;
  fiat_p256_addcarryx_u32(&x174, &x175, x173, x146, x148);
  uint32_t x176;
  fiat_p256_uint1 x177;
  fiat_p256_addcarryx_u32(&x176, &x177, x175, ((uint32_t)x147 + x131), x149);
  uint32_t x178;
  fiat_p256_uint1 x179;
  fiat_p256_addcarryx_u32(&x178, &x179, 0x0, x162, (arg1[5]));
  uint32_t x180;
  fiat_p256_uint1 x181;
  fiat_p256_addcarryx_u32(&x180, &x181, x179, x164, 0x0);
  uint32_t x182;
  fiat_p256_uint1 x183;
  fiat_p256_addcarryx_u32(&x182, &x183, x181, x166, 0x0);
  uint32_t x184;
  fiat_p256_uint1 x185;
  fiat_p256_addcarryx_u32(&x184, &x185, x183, x168, 0x0);
  uint32_t x186;
  fiat_p256_uint1 x187;
  fiat_p256_addcarryx_u32(&x186, &x187, x185, x170, 0x0);
  uint32_t x188;
  fiat_p256_uint1 x189;
  fiat_p256_addcarryx_u32(&x188, &x189, x187, x172, 0x0);
  uint32_t x190;
  fiat_p256_uint1 x191;
  fiat_p256_addcarryx_u32(&x190, &x191, x189, x174, 0x0);
  uint32_t x192;
  fiat_p256_uint1 x193;
  fiat_p256_addcarryx_u32(&x192, &x193, x191, x176, 0x0);
  uint32_t x194;
  uint32_t x195;
  fiat_p256_mulx_u32(&x194, &x195, x178, UINT32_C(0xffffffff));
  uint32_t x196;
  uint32_t x197;
  fiat_p256_mulx_u32(&x196, &x197, x178, UINT32_C(0xffffffff));
  uint32_t x198;
  uint32_t x199;
  fiat_p256_mulx_u32(&x198, &x199, x178, UINT32_C(0xffffffff));
  uint32_t x200;
  uint32_t x201;
  fiat_p256_mulx_u32(&x200, &x201, x178, UINT32_C(0xffffffff));
  uint32_t x202;
  fiat_p256_uint1 x203;
  fiat_p256_addcarryx_u32(&x202, &x203, 0x0, x201, x198);
  uint32_t x204;
  fiat_p256_uint1 x205;
  fiat_p256_addcarryx_u32(&x204, &x205, x203, x199, x196);
  uint32_t x206;
  fiat_p256_uint1 x207;
  fiat_p256_addcarryx_u32(&x206, &x207, 0x0, x178, x200);
  uint32_t x208;
  fiat_p256_uint1 x209;
  fiat_p256_addcarryx_u32(&x208, &x209, x207, x180, x202);
  uint32_t x210;
  fiat_p256_uint1 x211;
  fiat_p256_addcarryx_u32(&x210, &x211, x209, x182, x204);
  uint32_t x212;
  fiat_p256_uint1 x213;
  fiat_p256_addcarryx_u32(&x212, &x213, x211, x184, (x205 + x197));
  uint32_t x214;
  fiat_p256_uint1 x215;
  fiat_p256_addcarryx_u32(&x214, &x215, x213, x186, 0x0);
  uint32_t x216;
  fiat_p256_uint1 x217;
  fiat_p256_addcarryx_u32(&x216, &x217, x215, x188, 0x0);
  uint32_t x218;
  fiat_p256_uint1 x219;
  fiat_p256_addcarryx_u32(&x218, &x219, x217, x190, x178);
  uint32_t x220;
  fiat_p256_uint1 x221;
  fiat_p256_addcarryx_u32(&x220, &x221, x219, x192, x194);
  uint32_t x222;
  fiat_p256_uint1 x223;
  fiat_p256_addcarryx_u32(&x222, &x223, x221, ((uint32_t)x193 + x177), x195);
  uint32_t x224;
  fiat_p256_uint1 x225;
  fiat_p256_addcarryx_u32(&x224, &x225, 0x0, x208, (arg1[6]));
  uint32_t x226;
  fiat_p256_uint1 x227;
  fiat_p256_addcarryx_u32(&x226, &x227, x225, x210, 0x0);
  uint32_t x228;
  fiat_p256_uint1 x229;
  fiat_p256_addcarryx_u32(&x228, &x229, x227, x212, 0x0);
  uint32_t x230;
  fiat_p256_uint1 x231;
  fiat_p256_addcarryx_u32(&x230, &x231, x229, x214, 0x0);
  uint32_t x232;
  fiat_p256_uint1 x233;
  fiat_p256_addcarryx_u32(&x232, &x233, x231, x216, 0x0);
  uint32_t x234;
  fiat_p256_uint1 x235;
  fiat_p256_addcarryx_u32(&x234, &x235, x233, x218, 0x0);
  uint32_t x236;
  fiat_p256_uint1 x237;
  fiat_p256_addcarryx_u32(&x236, &x237, x235, x220, 0x0);
  uint32_t x238;
  fiat_p256_uint1 x239;
  fiat_p256_addcarryx_u32(&x238, &x239, x237, x222, 0x0);
  uint32_t x240;
  uint32_t x241;
  fiat_p256_mulx_u32(&x240, &x241, x224, UINT32_C(0xffffffff));
  uint32_t x242;
  uint32_t x243;
  fiat_p256_mulx_u32(&x242, &x243, x224, UINT32_C(0xffffffff));
  uint32_t x244;
  uint32_t x245;
  fiat_p256_mulx_u32(&x244, &x245, x224, UINT32_C(0xffffffff));
  uint32_t x246;
  uint32_t x247;
  fiat_p256_mulx_u32(&x246, &x247, x224, UINT32_C(0xffffffff));
  uint32_t x248;
  fiat_p256_uint1 x249;
  fiat_p256_addcarryx_u32(&x248, &x249, 0x0, x247, x244);
  uint32_t x250;
  fiat_p256_uint1 x251;
  fiat_p256_addcarryx_u32(&x250, &x251, x249, x245, x242);
  uint32_t x252;
  fiat_p256_uint1 x253;
  fiat_p256_addcarryx_u32(&x252, &x253, 0x0, x224, x246);
  uint32_t x254;
  fiat_p256_uint1 x255;
  fiat_p256_addcarryx_u32(&x254, &x255, x253, x226, x248);
  uint32_t x256;
  fiat_p256_uint1 x257;
  fiat_p256_addcarryx_u32(&x256, &x257, x255, x228, x250);
  uint32_t x258;
  fiat_p256_uint1 x259;
  fiat_p256_addcarryx_u32(&x258, &x259, x257, x230, (x251 + x243));
  uint32_t x260;
  fiat_p256_uint1 x261;
  fiat_p256_addcarryx_u32(&x260, &x261, x259, x232, 0x0);
  uint32_t x262;
  fiat_p256_uint1 x263;
  fiat_p256_addcarryx_u32(&x262, &x263, x261, x234, 0x0);
  uint32_t x264;
  fiat_p256_uint1 x265;
  fiat_p256_addcarryx_u32(&x264, &x265, x263, x236, x224);
  uint32_t x266;
  fiat_p256_uint1 x267;
  fiat_p256_addcarryx_u32(&x266, &x267, x265, x238, x240);
  uint32_t x268;
  fiat_p256_uint1 x269;
  fiat_p256_addcarryx_u32(&x268, &x269, x267, ((uint32_t)x239 + x223), x241);
  uint32_t x270;
  fiat_p256_uint1 x271;
  fiat_p256_addcarryx_u32(&x270, &x271, 0x0, x254, (arg1[7]));
  uint32_t x272;
  fiat_p256_uint1 x273;
  fiat_p256_addcarryx_u32(&x272, &x273, x271, x256, 0x0);
  uint32_t x274;
  fiat_p256_uint1 x275;
  fiat_p256_addcarryx_u32(&x274, &x275, x273, x258, 0x0);
  uint32_t x276;
  fiat_p256_uint1 x277;
  fiat_p256_addcarryx_u32(&x276, &x277, x275, x260, 0x0);
  uint32_t x278;
  fiat_p256_uint1 x279;
  fiat_p256_addcarryx_u32(&x278, &x279, x277, x262, 0x0);
  uint32_t x280;
  fiat_p256_uint1 x281;
  fiat_p256_addcarryx_u32(&x280, &x281, x279, x264, 0x0);
  uint32_t x282;
  fiat_p256_uint1 x283;
  fiat_p256_addcarryx_u32(&x282, &x283, x281, x266, 0x0);
  uint32_t x284;
  fiat_p256_uint1 x285;
  fiat_p256_addcarryx_u32(&x284, &x285, x283, x268, 0x0);
  uint32_t x286;
  uint32_t x287;
  fiat_p256_mulx_u32(&x286, &x287, x270, UINT32_C(0xffffffff));
  uint32_t x288;
  uint32_t x289;
  fiat_p256_mulx_u32(&x288, &x289, x270, UINT32_C(0xffffffff));
  uint32_t x290;
  uint32_t x291;
  fiat_p256_mulx_u32(&x290, &x291, x270, UINT32_C(0xffffffff));
  uint32_t x292;
  uint32_t x293;
  fiat_p256_mulx_u32(&x292, &x293, x270, UINT32_C(0xffffffff));
  uint32_t x294;
  fiat_p256_uint1 x295;
  fiat_p256_addcarryx_u32(&x294, &x295, 0x0, x293, x290);
  uint32_t x296;
  fiat_p256_uint1 x297;
  fiat_p256_addcarryx_u32(&x296, &x297, x295, x291, x288);
  uint32_t x298;
  fiat_p256_uint1 x299;
  fiat_p256_addcarryx_u32(&x298, &x299, 0x0, x270, x292);
  uint32_t x300;
  fiat_p256_uint1 x301;
  fiat_p256_addcarryx_u32(&x300, &x301, x299, x272, x294);
  uint32_t x302;
  fiat_p256_uint1 x303;
  fiat_p256_addcarryx_u32(&x302, &x303, x301, x274, x296);
  uint32_t x304;
  fiat_p256_uint1 x305;
  fiat_p256_addcarryx_u32(&x304, &x305, x303, x276, (x297 + x289));
  uint32_t x306;
  fiat_p256_uint1 x307;
  fiat_p256_addcarryx_u32(&x306, &x307, x305, x278, 0x0);
  uint32_t x308;
  fiat_p256_uint1 x309;
  fiat_p256_addcarryx_u32(&x308, &x309, x307, x280, 0x0);
  uint32_t x310;
  fiat_p256_uint1 x311;
  fiat_p256_addcarryx_u32(&x310, &x311, x309, x282, x270);
  uint32_t x312;
  fiat_p256_uint1 x313;
  fiat_p256_addcarryx_u32(&x312, &x313, x311, x284, x286);
  uint32_t x314;
  fiat_p256_uint1 x315;
  fiat_p256_addcarryx_u32(&x314, &x315, x313, ((uint32_t)x285 + x269), x287);
  uint32_t x316;
  fiat_p256_uint1 x317;
  fiat_p256_subborrowx_u32(&x316, &x317, 0x0, x300, UINT32_C(0xffffffff));
  uint32_t x318;
  fiat_p256_uint1 x319;
  fiat_p256_subborrowx_u32(&x318, &x319, x317, x302, UINT32_C(0xffffffff));
  uint32_t x320;
  fiat_p256_uint1 x321;
  fiat_p256_subborrowx_u32(&x320, &x321, x319, x304, UINT32_C(0xffffffff));
  uint32_t x322;
  fiat_p256_uint1 x323;
  fiat_p256_subborrowx_u32(&x322, &x323, x321, x306, 0x0);
  uint32_t x324;
  fiat_p256_uint1 x325;
  fiat_p256_subborrowx_u32(&x324, &x325, x323, x308, 0x0);
  uint32_t x326;
  fiat_p256_uint1 x327;
  fiat_p256_subborrowx_u32(&x326, &x327, x325, x310, 0x0);
  uint32_t x328;
  fiat_p256_uint1 x329;
  fiat_p256_subborrowx_u32(&x328, &x329, x327, x312, 0x1);
  uint32_t x330;
  fiat_p256_uint1 x331;
  fiat_p256_subborrowx_u32(&x330, &x331, x329, x314, UINT32_C(0xffffffff));
  uint32_t x332;
  fiat_p256_uint1 x333;
  fiat_p256_subborrowx_u32(&x332, &x333, x331, x315, 0x0);
  uint32_t x334;
  fiat_p256_cmovznz_u32(&x334, x333, x316, x300);
  uint32_t x335;
  fiat_p256_cmovznz_u32(&x335, x333, x318, x302);
  uint32_t x336;
  fiat_p256_cmovznz_u32(&x336, x333, x320, x304);
  uint32_t x337;
  fiat_p256_cmovznz_u32(&x337, x333, x322, x306);
  uint32_t x338;
  fiat_p256_cmovznz_u32(&x338, x333, x324, x308);
  uint32_t x339;
  fiat_p256_cmovznz_u32(&x339, x333, x326, x310);
  uint32_t x340;
  fiat_p256_cmovznz_u32(&x340, x333, x328, x312);
  uint32_t x341;
  fiat_p256_cmovznz_u32(&x341, x333, x330, x314);
  out1[0] = x334;
  out1[1] = x335;
  out1[2] = x336;
  out1[3] = x337;
  out1[4] = x338;
  out1[5] = x339;
  out1[6] = x340;
  out1[7] = x341;
}

/*
 * The function fiat_p256_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
 * Preconditions:
 *   0 ≤ eval arg1 < m
 * Postconditions:
 *   out1 = 0 ↔ eval (from_montgomery arg1) mod m = 0
 *
 * Input Bounds:
 *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 * Output Bounds:
 *   out1: [0x0 ~> 0xffffffff]
 */
static void fiat_p256_nonzero(uint32_t* out1, const uint32_t arg1[8]) {
  uint32_t x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | ((arg1[3]) | ((arg1[4]) | ((arg1[5]) | ((arg1[6]) | ((arg1[7]) | (uint32_t)0x0))))))));
  *out1 = x1;
}

/*
 * The function fiat_p256_selectznz is a multi-limb conditional select.
 * Postconditions:
 *   eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
 *
 * Input Bounds:
 *   arg1: [0x0 ~> 0x1]
 *   arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 *   arg3: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 * Output Bounds:
 *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 */
static void fiat_p256_selectznz(uint32_t out1[8], fiat_p256_uint1 arg1, const uint32_t arg2[8], const uint32_t arg3[8]) {
  uint32_t x1;
  fiat_p256_cmovznz_u32(&x1, arg1, (arg2[0]), (arg3[0]));
  uint32_t x2;
  fiat_p256_cmovznz_u32(&x2, arg1, (arg2[1]), (arg3[1]));
  uint32_t x3;
  fiat_p256_cmovznz_u32(&x3, arg1, (arg2[2]), (arg3[2]));
  uint32_t x4;
  fiat_p256_cmovznz_u32(&x4, arg1, (arg2[3]), (arg3[3]));
  uint32_t x5;
  fiat_p256_cmovznz_u32(&x5, arg1, (arg2[4]), (arg3[4]));
  uint32_t x6;
  fiat_p256_cmovznz_u32(&x6, arg1, (arg2[5]), (arg3[5]));
  uint32_t x7;
  fiat_p256_cmovznz_u32(&x7, arg1, (arg2[6]), (arg3[6]));
  uint32_t x8;
  fiat_p256_cmovznz_u32(&x8, arg1, (arg2[7]), (arg3[7]));
  out1[0] = x1;
  out1[1] = x2;
  out1[2] = x3;
  out1[3] = x4;
  out1[4] = x5;
  out1[5] = x6;
  out1[6] = x7;
  out1[7] = x8;
}

/*
 * The function fiat_p256_to_bytes serializes a field element in the Montgomery domain to bytes in little-endian order.
 * Preconditions:
 *   0 ≤ eval arg1 < m
 * Postconditions:
 *   out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31]
 *
 * Input Bounds:
 *   arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 * Output Bounds:
 *   out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
 */
static void fiat_p256_to_bytes(uint8_t out1[32], const uint32_t arg1[8]) {
  uint32_t x1 = (arg1[7]);
  uint32_t x2 = (arg1[6]);
  uint32_t x3 = (arg1[5]);
  uint32_t x4 = (arg1[4]);
  uint32_t x5 = (arg1[3]);
  uint32_t x6 = (arg1[2]);
  uint32_t x7 = (arg1[1]);
  uint32_t x8 = (arg1[0]);
  uint32_t x9 = (x8 >> 8);
  uint8_t x10 = (uint8_t)(x8 & UINT8_C(0xff));
  uint32_t x11 = (x9 >> 8);
  uint8_t x12 = (uint8_t)(x9 & UINT8_C(0xff));
  uint8_t x13 = (uint8_t)(x11 >> 8);
  uint8_t x14 = (uint8_t)(x11 & UINT8_C(0xff));
  uint8_t x15 = (uint8_t)(x13 & UINT8_C(0xff));
  uint32_t x16 = (x7 >> 8);
  uint8_t x17 = (uint8_t)(x7 & UINT8_C(0xff));
  uint32_t x18 = (x16 >> 8);
  uint8_t x19 = (uint8_t)(x16 & UINT8_C(0xff));
  uint8_t x20 = (uint8_t)(x18 >> 8);
  uint8_t x21 = (uint8_t)(x18 & UINT8_C(0xff));
  uint8_t x22 = (uint8_t)(x20 & UINT8_C(0xff));
  uint32_t x23 = (x6 >> 8);
  uint8_t x24 = (uint8_t)(x6 & UINT8_C(0xff));
  uint32_t x25 = (x23 >> 8);
  uint8_t x26 = (uint8_t)(x23 & UINT8_C(0xff));
  uint8_t x27 = (uint8_t)(x25 >> 8);
  uint8_t x28 = (uint8_t)(x25 & UINT8_C(0xff));
  uint8_t x29 = (uint8_t)(x27 & UINT8_C(0xff));
  uint32_t x30 = (x5 >> 8);
  uint8_t x31 = (uint8_t)(x5 & UINT8_C(0xff));
  uint32_t x32 = (x30 >> 8);
  uint8_t x33 = (uint8_t)(x30 & UINT8_C(0xff));
  uint8_t x34 = (uint8_t)(x32 >> 8);
  uint8_t x35 = (uint8_t)(x32 & UINT8_C(0xff));
  uint8_t x36 = (uint8_t)(x34 & UINT8_C(0xff));
  uint32_t x37 = (x4 >> 8);
  uint8_t x38 = (uint8_t)(x4 & UINT8_C(0xff));
  uint32_t x39 = (x37 >> 8);
  uint8_t x40 = (uint8_t)(x37 & UINT8_C(0xff));
  uint8_t x41 = (uint8_t)(x39 >> 8);
  uint8_t x42 = (uint8_t)(x39 & UINT8_C(0xff));
  uint8_t x43 = (uint8_t)(x41 & UINT8_C(0xff));
  uint32_t x44 = (x3 >> 8);
  uint8_t x45 = (uint8_t)(x3 & UINT8_C(0xff));
  uint32_t x46 = (x44 >> 8);
  uint8_t x47 = (uint8_t)(x44 & UINT8_C(0xff));
  uint8_t x48 = (uint8_t)(x46 >> 8);
  uint8_t x49 = (uint8_t)(x46 & UINT8_C(0xff));
  uint8_t x50 = (uint8_t)(x48 & UINT8_C(0xff));
  uint32_t x51 = (x2 >> 8);
  uint8_t x52 = (uint8_t)(x2 & UINT8_C(0xff));
  uint32_t x53 = (x51 >> 8);
  uint8_t x54 = (uint8_t)(x51 & UINT8_C(0xff));
  uint8_t x55 = (uint8_t)(x53 >> 8);
  uint8_t x56 = (uint8_t)(x53 & UINT8_C(0xff));
  uint8_t x57 = (uint8_t)(x55 & UINT8_C(0xff));
  uint32_t x58 = (x1 >> 8);
  uint8_t x59 = (uint8_t)(x1 & UINT8_C(0xff));
  uint32_t x60 = (x58 >> 8);
  uint8_t x61 = (uint8_t)(x58 & UINT8_C(0xff));
  uint8_t x62 = (uint8_t)(x60 >> 8);
  uint8_t x63 = (uint8_t)(x60 & UINT8_C(0xff));
  out1[0] = x10;
  out1[1] = x12;
  out1[2] = x14;
  out1[3] = x15;
  out1[4] = x17;
  out1[5] = x19;
  out1[6] = x21;
  out1[7] = x22;
  out1[8] = x24;
  out1[9] = x26;
  out1[10] = x28;
  out1[11] = x29;
  out1[12] = x31;
  out1[13] = x33;
  out1[14] = x35;
  out1[15] = x36;
  out1[16] = x38;
  out1[17] = x40;
  out1[18] = x42;
  out1[19] = x43;
  out1[20] = x45;
  out1[21] = x47;
  out1[22] = x49;
  out1[23] = x50;
  out1[24] = x52;
  out1[25] = x54;
  out1[26] = x56;
  out1[27] = x57;
  out1[28] = x59;
  out1[29] = x61;
  out1[30] = x63;
  out1[31] = x62;
}

/*
 * The function fiat_p256_from_bytes deserializes a field element in the Montgomery domain from bytes in little-endian order.
 * Preconditions:
 *   0 ≤ bytes_eval arg1 < m
 * Postconditions:
 *   eval out1 mod m = bytes_eval arg1 mod m
 *   0 ≤ eval out1 < m
 *
 * Input Bounds:
 *   arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
 * Output Bounds:
 *   out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
 */
static void fiat_p256_from_bytes(uint32_t out1[8], const uint8_t arg1[32]) {
  uint32_t x1 = ((uint32_t)(arg1[31]) << 24);
  uint32_t x2 = ((uint32_t)(arg1[30]) << 16);
  uint32_t x3 = ((uint32_t)(arg1[29]) << 8);
  uint8_t x4 = (arg1[28]);
  uint32_t x5 = ((uint32_t)(arg1[27]) << 24);
  uint32_t x6 = ((uint32_t)(arg1[26]) << 16);
  uint32_t x7 = ((uint32_t)(arg1[25]) << 8);
  uint8_t x8 = (arg1[24]);
  uint32_t x9 = ((uint32_t)(arg1[23]) << 24);
  uint32_t x10 = ((uint32_t)(arg1[22]) << 16);
  uint32_t x11 = ((uint32_t)(arg1[21]) << 8);
  uint8_t x12 = (arg1[20]);
  uint32_t x13 = ((uint32_t)(arg1[19]) << 24);
  uint32_t x14 = ((uint32_t)(arg1[18]) << 16);
  uint32_t x15 = ((uint32_t)(arg1[17]) << 8);
  uint8_t x16 = (arg1[16]);
  uint32_t x17 = ((uint32_t)(arg1[15]) << 24);
  uint32_t x18 = ((uint32_t)(arg1[14]) << 16);
  uint32_t x19 = ((uint32_t)(arg1[13]) << 8);
  uint8_t x20 = (arg1[12]);
  uint32_t x21 = ((uint32_t)(arg1[11]) << 24);
  uint32_t x22 = ((uint32_t)(arg1[10]) << 16);
  uint32_t x23 = ((uint32_t)(arg1[9]) << 8);
  uint8_t x24 = (arg1[8]);
  uint32_t x25 = ((uint32_t)(arg1[7]) << 24);
  uint32_t x26 = ((uint32_t)(arg1[6]) << 16);
  uint32_t x27 = ((uint32_t)(arg1[5]) << 8);
  uint8_t x28 = (arg1[4]);
  uint32_t x29 = ((uint32_t)(arg1[3]) << 24);
  uint32_t x30 = ((uint32_t)(arg1[2]) << 16);
  uint32_t x31 = ((uint32_t)(arg1[1]) << 8);
  uint8_t x32 = (arg1[0]);
  uint32_t x33 = (x32 + (x31 + (x30 + x29)));
  uint32_t x34 = (x33 & UINT32_C(0xffffffff));
  uint32_t x35 = (x4 + (x3 + (x2 + x1)));
  uint32_t x36 = (x8 + (x7 + (x6 + x5)));
  uint32_t x37 = (x12 + (x11 + (x10 + x9)));
  uint32_t x38 = (x16 + (x15 + (x14 + x13)));
  uint32_t x39 = (x20 + (x19 + (x18 + x17)));
  uint32_t x40 = (x24 + (x23 + (x22 + x21)));
  uint32_t x41 = (x28 + (x27 + (x26 + x25)));
  uint32_t x42 = (x41 & UINT32_C(0xffffffff));
  uint32_t x43 = (x40 & UINT32_C(0xffffffff));
  uint32_t x44 = (x39 & UINT32_C(0xffffffff));
  uint32_t x45 = (x38 & UINT32_C(0xffffffff));
  uint32_t x46 = (x37 & UINT32_C(0xffffffff));
  uint32_t x47 = (x36 & UINT32_C(0xffffffff));
  out1[0] = x34;
  out1[1] = x42;
  out1[2] = x43;
  out1[3] = x44;
  out1[4] = x45;
  out1[5] = x46;
  out1[6] = x47;
  out1[7] = x35;
}

